{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gc\n",
    "import time\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from scipy.sparse import csr_matrix, hstack\n",
    "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n",
    "from sklearn.preprocessing import LabelBinarizer\n",
    "from sklearn.model_selection import train_test_split, cross_val_score\n",
    "from sklearn.metrics import mean_squared_error\n",
    "import lightgbm as lgb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# The Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Split the dataset in to train and test. We are using training data only for EDA."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('train.tsv', sep = '\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "msk = np.random.rand(len(df)) < 0.8\n",
    "train = df[msk]\n",
    "test = df[~msk]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((1185866, 8), (296669, 8))"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape, test.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# EDA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>train_id</th>\n",
       "      <th>name</th>\n",
       "      <th>item_condition_id</th>\n",
       "      <th>category_name</th>\n",
       "      <th>brand_name</th>\n",
       "      <th>price</th>\n",
       "      <th>shipping</th>\n",
       "      <th>item_description</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>MLB Cincinnati Reds T Shirt Size XL</td>\n",
       "      <td>3</td>\n",
       "      <td>Men/Tops/T-shirts</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.0</td>\n",
       "      <td>1</td>\n",
       "      <td>No description yet</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Razer BlackWidow Chroma Keyboard</td>\n",
       "      <td>3</td>\n",
       "      <td>Electronics/Computers &amp; Tablets/Components &amp; P...</td>\n",
       "      <td>Razer</td>\n",
       "      <td>52.0</td>\n",
       "      <td>0</td>\n",
       "      <td>This keyboard is in great condition and works ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>AVA-VIV Blouse</td>\n",
       "      <td>1</td>\n",
       "      <td>Women/Tops &amp; Blouses/Blouse</td>\n",
       "      <td>Target</td>\n",
       "      <td>10.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Adorable top with a hint of lace and a key hol...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>Leather Horse Statues</td>\n",
       "      <td>1</td>\n",
       "      <td>Home/Home Décor/Home Décor Accents</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>New with tags. Leather horses. Retail for [rm]...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>Bundled items requested for Ruie</td>\n",
       "      <td>3</td>\n",
       "      <td>Women/Other/Other</td>\n",
       "      <td>NaN</td>\n",
       "      <td>59.0</td>\n",
       "      <td>0</td>\n",
       "      <td>Banana republic bottoms, Candies skirt with ma...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   train_id                                 name  item_condition_id  \\\n",
       "0         0  MLB Cincinnati Reds T Shirt Size XL                  3   \n",
       "1         1     Razer BlackWidow Chroma Keyboard                  3   \n",
       "2         2                       AVA-VIV Blouse                  1   \n",
       "3         3                Leather Horse Statues                  1   \n",
       "5         5     Bundled items requested for Ruie                  3   \n",
       "\n",
       "                                       category_name brand_name  price  \\\n",
       "0                                  Men/Tops/T-shirts        NaN   10.0   \n",
       "1  Electronics/Computers & Tablets/Components & P...      Razer   52.0   \n",
       "2                        Women/Tops & Blouses/Blouse     Target   10.0   \n",
       "3                 Home/Home Décor/Home Décor Accents        NaN   35.0   \n",
       "5                                  Women/Other/Other        NaN   59.0   \n",
       "\n",
       "   shipping                                   item_description  \n",
       "0         1                                 No description yet  \n",
       "1         0  This keyboard is in great condition and works ...  \n",
       "2         1  Adorable top with a hint of lace and a key hol...  \n",
       "3         1  New with tags. Leather horses. Retail for [rm]...  \n",
       "5         0  Banana republic bottoms, Candies skirt with ma...  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 1185866 entries, 0 to 1482533\n",
      "Data columns (total 8 columns):\n",
      "train_id             1185866 non-null int64\n",
      "name                 1185866 non-null object\n",
      "item_condition_id    1185866 non-null int64\n",
      "category_name        1180783 non-null object\n",
      "brand_name           679496 non-null object\n",
      "price                1185866 non-null float64\n",
      "shipping             1185866 non-null int64\n",
      "item_description     1185863 non-null object\n",
      "dtypes: float64(1), int64(3), object(4)\n",
      "memory usage: 81.4+ MB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    1.185866e+06\n",
       "mean     2.674116e+01\n",
       "std      3.852606e+01\n",
       "min      0.000000e+00\n",
       "25%      1.000000e+01\n",
       "50%      1.700000e+01\n",
       "75%      2.900000e+01\n",
       "max      2.009000e+03\n",
       "Name: price, dtype: float64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.price.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5,1,'Price Distribution')"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAuoAAAGGCAYAAAAtuaQaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xu8XVV97/3P14AGr4BEi1wMampFn6oYgVNPWyqKAVSwj1SpldRi4+PBqsdeDLavA0elD/a0XqiWFgUFqyJewQpivNVjjyBBqYDoQ4ooMQjRcKsYgfh7/lhj62Kz9n3vrLlXPu/Xa732Wr855ly/mZ2M9ctYY46ZqkKSJElSt9xv2AlIkiRJui8LdUmSJKmDLNQlSZKkDrJQlyRJkjrIQl2SJEnqIAt1SZIkqYMs1DUyklyUZPUQ3vclST47j8e7Oskh7fnJSf55Ho/9hiTvma/jSdJc2G9P69j22zuwuI66uirJ9cAjgW3AT4ALgT+pqv/cjjm8D/h94Gct9D3gU8CpVXXbLI61sar+agb7nAw8rqr+YCbv1fY9BPjnqtp7pvtK0mzYb9tva345oq6ue15VPRg4AHg6cJ/OMj0L+Xf5b6rqIcAy4GXAwcC/JXnQfL5Jkp3m83iSNCT229I8sVDXolBVPwAuAp4EkORLSU5J8m/AncBjWuzlY/sk+eMk1yS5I8m3khzQ4o9K8rEkm5N8N8mrp5nD1qq6DHg+8HB6nT9J/jDJV9rzJHlbkpuT3Jbkm0melGQN8BLgL5L8Z5JPtfbXJ3l9km8CP0myU4s9q++tlyb5cDuPryd5ct85VpLH9b1+X5I3tw+ji4BHtff7z3be9/pKNsnz21e2t7Y/vyf0bbs+yZ+1c7it5bB0On9WkmS/bb+tubNQ16KQZB/gCOAbfeGXAmuAh9D7arO//THAycBxwEPpddI/biM4nwL+HdgLOBR4bZLnTDeXqroDWAf85oDNhwG/BfwqsCvwIuDHVXUG8AF6ozwPrqrn9e1zLHAksGtV3TPgmEcBHwF2Bz4IfDLJzlPk+BPgcGBTe78HV9Wm/jZJfhX4EPBaeqNOFwKfSnL/vma/B6wC9gN+HfjDyd5XksbYb9tva+4s1NV1n0xyK/AV4F+Bv+7b9r6qurqq7qmqu8ft93J6netl1bOhqr5H72vYZVX1xqq6q6quA94NvHiGeW2i1wGPdze9D6Bfo3cNyDVVdeMUxzqtqm6oqp9OsP3yqvpoO8e3AkvpfY07Vy8CPl1V69qx/xbYBfiNcbltqqot9D4onzIP7ytptNlv229rnji3Sl13dFV9boJtN0yy3z7AfwyIP5re14q39sWWAP97hnntBWwZH6yqLyR5J/AuYN8knwD+rKpun+RYk53HvbZX1c+TbAQeNcN8B3kUfSNa7dg30Du3MT/se37nPL2vpNFmv22/rXniiLoWs8mWLLoBeOwE8e9W1a59j4dU1RHTfdMkDwaexQQfElV1WlU9DXgiva9S/3yKfKdaemmfvve+H7A3vZEh6HXCD+xr+yszOO4meh+AY8dOe68fTLGfJM2W/bb9tmbAQl2j6j3AnyV5WrtQ6HFJHg18Dbi9XQi0S5Il7aKhp091wCQPSPI04JPALcB7B7R5epKD2lzEnwBb6S1TBnAT8JhZnMvTkvxueqsLvJbekmOXtG1XAL/fzmMV8Nt9+90EPDzJwyY47nnAkUkObfn+aTv2/5lFjpI0V/bb9tsax0JdI6mqPgKcQu8injvoddK7V9U24Hn05ux9F/gRvQ+HiTpF6F3xfwe9r0zPAS4HfqNd+DPeQ+nNnbyF3teTP6Y3hxDgTGD/dqX+J2dwOufTm5d4C70LsX63b27na9r53EpvdYJfHLeqvk3voqPr2nve6+vPqvoO8AfA39P7c3gevWXV7ppBbpI0L+y37bd1X97wSJIkSeogR9QlSZKkDrJQlyRJkjrIQl2SJEnqIAt1SZIkqYMs1CVJkqQO8s6kzR577FHLly8fdhqSNGOXX375j6pq2bDz2J7ssyUtZtPtty3Um+XLl7N+/fphpyFJM5bke1O3Gi322ZIWs+n22059kSRJkjrIQl2SJEnqIAt1SZIkqYMs1CVJkqQOslCXJEmSOshCXZIkSeogC3VJkiSpgyzUJUmSpA6yUJckSZI6yEJdkiRJ6iALdUmSJKmDLNQlSZKkDrJQlyRJkjrIQn072nr3tmnFJEnquok+v/xck+bPTsNOYEeydOclLF/76XvFrj/1yCFlI0nS7A36TAM/16T55Ii6JEmS1EEW6pIkSVIHWahLkiRJHWShLkmSJHWQhbokSZLUQRbqkiRJUgdZqEuSJEkdZKEuSZIkdZCFuiRJktRBFuqSJElSB1moS5IkSR1koS5JkiR10IIV6kn2SfLFJNckuTrJa1r85CQ/SHJFexzRt8+JSTYk+U6S5/TFV7XYhiRr++L7Jbk0ybVJPpzk/i3+gPZ6Q9u+fKHOU5IkSVoICzmifg/wp1X1BOBg4IQk+7dtb6uqp7THhQBt24uBJwKrgH9IsiTJEuBdwOHA/sCxfcd5SzvWCuAW4PgWPx64paoeB7yttZMkSZIWjQUr1Kvqxqr6ent+B3ANsNckuxwFnFtVP6uq7wIbgAPbY0NVXVdVdwHnAkclCfBM4KNt/7OBo/uOdXZ7/lHg0NZekiRJWhS2yxz1NvXkqcClLfSqJN9MclaS3VpsL+CGvt02tthE8YcDt1bVPePi9zpW235bay9JkiQtCgteqCd5MPAx4LVVdTtwOvBY4CnAjcDfjTUdsHvNIj7ZscbntibJ+iTrN2/ePOl5SJIkSdvTghbqSXamV6R/oKo+DlBVN1XVtqr6OfBuelNboDcivk/f7nsDmyaJ/wjYNclO4+L3Olbb/jBgy/j8quqMqlpZVSuXLVs219OVJEmS5s1CrvoS4Ezgmqp6a198z75mLwCuas8vAF7cVmzZD1gBfA24DFjRVni5P70LTi+oqgK+CLyw7b8aOL/vWKvb8xcCX2jtJUmSpEVhIUfUnwG8FHjmuKUY/ybJlUm+CfwO8N8Bqupq4DzgW8BngBPayPs9wKuAi+ldkHpeawvweuB1STbQm4N+ZoufCTy8xV8H/GJJR0nSYO26oZuTXNUX+19Jvt2uK/pEkl37trmkriQtoJ2mbjI7VfUVBs8Vv3CSfU4BThkQv3DQflV1Hb+cOtMf3wocM5N8JUm8D3gncE5fbB1wYlXdk+QtwInA68ctqfso4HNJfrXt8y7g2fSmIV6W5IKq+ha/XFL33CT/SG8p3dPpW1I3yYtbuxct8LlKUud5Z1JJEgBV9WXGXc9TVZ/tW13rEnrXA4FL6krSgrNQlyRN1x8BF7XnLqkrSQvMQl2SNKUkf0nvjtMfGAsNaOaSupI0jyzUJUmTSrIaeC7wkr4VtFxSV5IWmIW6JGlCSVbRW2Hr+VV1Z98ml9SVpAW2YKu+SJIWlyQfAg4B9kiyETiJ3iovDwDWtes7L6mq/6eqrk4ytqTuPbQlddtxxpbUXQKcNW5J3XOTvBn4BvdeUvf9bUndLfSKe0na4VmoS5IAqKpjB4TPHBAba++SupK0gJz6IkmSJHWQhbokSZLUQRbqkiRJUgdZqEuSJEkdZKEuSZIkdZCFuiRJktRBFuqSJElSB1moS5IkSR1koS5JkiR1kIW6JEmS1EEW6pIkSVIHWahLkiRJHWShLkmSJHWQhbokSZLUQRbqkiRJUgdZqEuSJEkdZKEuSZIkdZCFuiRJktRBFuqSJElSB1moS5IkSR1koS5JkiR1kIW6JEmS1EEW6pIkSVIHWahLkiRJHWShLkmSJHWQhbokSZLUQRbqkiRJUgdZqEuSJEkdZKEuSZIkdZCFuiRJktRBFuqSJElSB1moS5IkSR1koS5JkiR1kIW6JEmS1EEW6pIkSVIHWahLkiRJHWShLkmSJHWQhbokSZLUQRbqkiSp87bevW1aMWmUWKhLkgBIclaSm5Nc1RfbPcm6JNe2n7u1eJKclmRDkm8mOaBvn9Wt/bVJVvfFn5bkyrbPaUky2XtI/ZbuvITlaz99r8fSnZcMOy1pQVmoS5LGvA9YNS62Fvh8Va0APt9eAxwOrGiPNcDp0Cu6gZOAg4ADgZP6Cu/TW9ux/VZN8R6StEOzUJckAVBVXwa2jAsfBZzdnp8NHN0XP6d6LgF2TbIn8BxgXVVtqapbgHXAqrbtoVX11aoq4Jxxxxr0HpK0Q7NQlyRN5pFVdSNA+/mIFt8LuKGv3cYWmyy+cUB8sve4lyRrkqxPsn7z5s1zOilJWgwWrFBPsk+SLya5JsnVSV7T4s53lKTFLwNiNYv4tFXVGVW1sqpWLlu2bCa7StKitJAj6vcAf1pVTwAOBk5Isj/Od5SkxeSmNm2F9vPmFt8I7NPXbm9g0xTxvQfEJ3sPjThXcpEmt2CFelXdWFVfb8/vAK6h9zWn8x0lafG4ABj7JnM1cH5f/Lj2bejBwG1t2srFwGFJdmuDKocBF7dtdyQ5uH37edy4Yw16D404V3KRJrfT9niTJMuBpwKXMm4uYpIFn+/Y9x7j81pDb0Sefffdd5ZnJ0mjIcmHgEOAPZJspPdt5qnAeUmOB74PHNOaXwgcAWwA7gReBlBVW5K8CbistXtjVY1doPpKeivL7AJc1B5M8h6StENb8EI9yYOBjwGvrarb2zTygU0HxBZ8viNwBsDKlStntK8kjZqqOnaCTYcOaFvACRMc5yzgrAHx9cCTBsR/POg9JGlHt6CrviTZmV6R/oGq+ngLO99RkiRJmsJCrvoS4Ezgmqp6a98m5ztKkiRJU1jIqS/PAF4KXJnkihZ7A853lCRJkqa0YIV6VX2FwfPIwfmOkiRJ0qS8M6kkSZLUQRbqkiRJUgdZqEuSpHnj3Ual+bNdbni0o9l69zbvrCZJ2iGN3W203/WnHjmkbKTFzUJ9AQzqpMCOSpIkSdPn1BdJkrQoTTSlxqk2GhWOqEuSpEXJb7A16hxRlyRJkjrIQl2SJEnqIAt1SZIkqYMs1CVJkqQOslCXJEmSOshCXZIkSeogC3VJkrSgXNdcmh3XUZckSQvK9c6l2XFEXZIkSeogC3VJkiSpgyzUJUmSpA6yUB+yiS6w8cIbSZKkHZsXkw6ZF9hIkiRpEEfUJUmSpA6yUJckSZI6yEJdkiRJ6iALdUmSJKmDLNQlSZKkDrJQlyRJkjrIQl2SJEnqIAt1SZIkqYMs1CVJkqQOslCXJEmSOshCXZIkSeogC3VJkiSpgyzUJUmSpA6yUJckSZI6yEJdkiRJ6iALdUnSlJL89yRXJ7kqyYeSLE2yX5JLk1yb5MNJ7t/aPqC93tC2L+87zokt/p0kz+mLr2qxDUnWbv8zlKTusVCXJE0qyV7Aq4GVVfUkYAnwYuAtwNuqagVwC3B82+V44JaqehzwttaOJPu3/Z4IrAL+IcmSJEuAdwGHA/sDx7a2krRDs1CXJE3HTsAuSXYCHgjcCDwT+GjbfjZwdHt+VHtN235okrT4uVX1s6r6LrABOLA9NlTVdVV1F3Bua6sd0Na7tw07Bakzdhp2ApKkbquqHyT5W+D7wE+BzwKXA7dW1T2t2UZgr/Z8L+CGtu89SW4DHt7il/Qdun+fG8bFDxqfR5I1wBqAfffdd+4npk5auvMSlq/99H3i15965BCykYbLEXVJ0qSS7EZvhHs/4FHAg+hNUxmvxnaZYNtM4/cOVJ1RVSurauWyZcumk7okLWoW6pKkqTwL+G5Vba6qu4GPA78B7NqmwgDsDWxqzzcC+wC07Q8DtvTHx+0zUVySdmgW6pKkqXwfODjJA9tc80OBbwFfBF7Y2qwGzm/PL2ivadu/UFXV4i9uq8LsB6wAvgZcBqxoq8jcn94Fpxdsh/OSpE6bVqGe5EkLnYgkaX7Md59dVZfSuyj068CV9D47zgBeD7wuyQZ6c9DPbLucCTy8xV8HrG3HuRo4j16R/xnghKra1ua5vwq4GLgGOK+1laQd2nQvJv3HNsrxPuCDVXXrwqUkSZqjee+zq+ok4KRx4evordgyvu1W4JgJjnMKcMqA+IXAhXPNU5JGybRG1KvqvwIvoTeHcH2SDyZ59oJmJkmaFftsSRoN056jXlXXAn9F76vO3wZOS/LtJL+7UMlJkmbHPluSFr/pzlH/9SRvozd38JnA86rqCe352xYwP0nSDNlnS9JomO4c9XcC7wbeUFU/HQtW1aYkf7UgmUmSZss+W5JGwHQL9SOAn1bVNoAk9wOWVtWdVfX+BctOkjQb9tmSNAKmO0f9c8Aufa8f2GKSpO6xz5akETDdQn1pVf3n2Iv2/IGT7ZDkrCQ3J7mqL3Zykh8kuaI9jujbdmKSDUm+k+Q5ffFVLbYhydq++H5JLk1ybZIPt6XIaDfS+HBrf2mS5dM8R0kaFTPusyVJ3TPdQv0nSQ4Ye5HkacBPJ2kPvfV7Vw2Iv62qntIeF7bj7U/vTnRPbPv8Q5IlSZYA7wIOB/YHjm1tAd7SjrUCuAU4vsWPB26pqsfRu2jqLdM8R0kaFbPpsyVJHTPdOeqvBT6SZFN7vSfwosl2qKovz2A0+yjg3Kr6GfDddje7sZtobKiq6wCSnAsclWRsJYPfb23OBk4GTm/HOrnFPwq8M0na7aslaUcw4z5bktQ90yrUq+qyJL8GPB4I8O2qunuW7/mqJMcB64E/rapbgL2AS/rabGwxgBvGxQ+id6vqW9ttp8e332tsn6q6J8ltrf2PxieSZA2wBmDfffed5elIUrfMc58tSRqSad/wCHg68OvAU+lNQTluFu93OvBY4CnAjcDftXgGtK1ZxCc71n2DVWdU1cqqWrls2bLJ8pakxWY++mxJ0hBNa0Q9yfvpFdhXANtauIBzZvJmVXVT3zHfDfxLe7mR3q2ux+wNjH1lOyj+I2DXJDu1UfX+9mPH2phkJ+BhwJaZ5ClJi9l89dmSpOGa7hz1lcD+c53nnWTPqrqxvXwBMLYizAXAB5O8FXgUsAL4Gr3R8RVJ9gN+QO+C09+vqkryReCFwLnAauD8vmOtBr7atn/B+emSdjDz0mdLkoZruoX6VcCv0JuuMi1JPgQcAuyRZCNwEnBIkqfQG9m5HngFQFVdneQ84FvAPcAJfTfqeBVwMbAEOKuqrm5v8Xrg3CRvBr4BnNniZwLvbxekbqFX3EvSjmTGfbYkqXumW6jvAXwrydeAn40Fq+r5E+1QVccOCJ85IDbW/hTglAHxC4ELB8Sv45crw/THtwLHTPQ+krQDmHGfLUnqnukW6icvZBKSpHl18rATkIZp693bWLrzkiljUtdNd3nGf03yaGBFVX0uyQPpTUWRJHWMfbZ2dEt3XsLytZ++V+z6U48cUjbS7E1recYkf0zv5kH/1EJ7AZ9cqKQkSbNnny1Jo2G666ifADwDuB2gqq4FHrFQSUmS5sQ+W5JGwHQL9Z9V1V1jL9r65C77JUndZJ8tSSNguoX6vyZ5A7BLkmcDHwE+tXBpSZLmwD5bkkbAdAv1tcBm4Ep6a59fCPzVQiUlSZoT+2xJGgHTXfXl58C720OS1GH22ZI0GqZVqCf5LgPmN1bVY+Y9I0nSnNhnS9JomO4Nj1b2PV9K786fu89/OpKkeWCfLUkjYFpz1Kvqx32PH1TV24FnLnBukqRZsM+WpNEw3akvB/S9vB+90ZqHLEhGkqQ5sc+WpNEw3akvf9f3/B7geuD35j0b/cLWu7exdOclU8YkaQD7bEkaAdNd9eV3FjoR3dvSnZewfO2n7xW7/tQjh5SNpMXEPluSRsN0p768brLtVfXW+UlHkjRX9tmSNBpmsurL04EL2uvnAV8GbliIpCRJc2KfLUkjYLqF+h7AAVV1B0CSk4GPVNXLFyoxSdKs2WdL0giY1vKMwL7AXX2v7wKWz3s2kqT5YJ8tSSNguiPq7we+luQT9O529wLgnAXLSpI0F/bZkjQCprvqyylJLgJ+s4VeVlXfWLi0JEmzZZ8tSaNhulNfAB4I3F5V7wA2JtlvgXKSJM2dfbYkLXLTKtSTnAS8HjixhXYG/nmhkpIkzZ59tiSNhumOqL8AeD7wE4Cq2oS3o5akrrLPlqQRMN1C/a6qKnoXJZHkQQuXkiRpjua9z06ya5KPJvl2kmuS/JckuydZl+Ta9nO31jZJTkuyIck3kxzQd5zVrf21SVb3xZ+W5Mq2z2lJMtecJWmxm26hfl6SfwJ2TfLHwOeAdy9cWpKkOViIPvsdwGeq6teAJwPXAGuBz1fVCuDz7TXA4cCK9lgDnA6QZHfgJOAg4EDgpLHivrVZ07ffqjnmK0mL3nRXffnbJM8GbgceD/yPqlq3oJlJkmZlvvvsJA8Ffgv4w3b8u4C7khwFHNKanQ18id7c+KOAc9qo/iVtNH7P1nZdVW1px10HrEryJeChVfXVFj8HOBq4aLY5S9IomLJQT7IEuLiqngVYnEtShy1Qn/0YYDPw3iRPBi4HXgM8sqpuBKiqG5M8orXfC7ihb/+NLTZZfOOAuCTt0Kac+lJV24A7kzxsO+QjSZqDBeqzdwIOAE6vqqfSu0h17STtB80vr1nE733QZE2S9UnWb968eeqsJWmRm+6dSbcCV7avKX8yFqyqVy9IVpKkuZjvPnsjsLGqLm2vP0qvUL8pyZ5tNH1P4Oa+9vv07b83sKnFDxkX/1KL7z2g/b1U1RnAGQArV668TyEvSaNmuoX6p9tDktR989pnV9UPk9yQ5PFV9R3gUOBb7bEaOLX9PL/tcgHwqiTn0rtw9LZWzF8M/HXfBaSHASdW1ZYkdyQ5GLgUOA74+/nKX5IWq0kL9ST7VtX3q+rs7ZWQJGl2FrjP/hPgA0nuD1wHvIze9MnzkhwPfB84prW9EDgC2ADc2drSCvI3AZe1dm8cu7AUeCXwPmAXeheReiGppB3eVCPqn6Q3L5EkH6uq/3vhU5IkzdKC9dlVdQWwcsCmQwe0LeCECY5zFnDWgPh64ElzTFOSRspUF5P2X+DzmIVMRJI0Z/bZkjRCpirUa4LnkqTusc+WpBEy1dSXJye5nd4ozS7tOe11VdVDFzQ7SdJM2GdL0giZtFCvqiXbKxFJ0tzYZ0vSaJnyhkeSJEmStj8LdUmSJKmDLNQlSZKkDrJQlyRJkjrIQl2SJEnqIAt1SZIkqYMs1CVJkqQOslCXJEmSOshCXZIkSeogC3VJkiSpgyzUJUmSpA6yUJckSZI6yEJdkiRJ6qAFK9STnJXk5iRX9cV2T7IuybXt524tniSnJdmQ5JtJDujbZ3Vrf22S1X3xpyW5su1zWpJM9h6SJEnSYrKQI+rvA1aNi60FPl9VK4DPt9cAhwMr2mMNcDr0im7gJOAg4EDgpL7C+/TWdmy/VVO8hyRJ2kFtvXvbjOJSF+y0UAeuqi8nWT4ufBRwSHt+NvAl4PUtfk5VFXBJkl2T7NnarquqLQBJ1gGrknwJeGhVfbXFzwGOBi6a5D0kSdIOaunOS1i+9tP3iV9/6pFDyEaanu09R/2RVXUjQPv5iBbfC7ihr93GFpssvnFAfLL3kCRJM+SIszQ8CzaiPkMZEKtZxGf2pskaetNn2HfffWe6uyRJI8+RaGl4tveI+k1tSgvt580tvhHYp6/d3sCmKeJ7D4hP9h73UVVnVNXKqlq5bNmyWZ+UJEmSNN+2d6F+ATC2cstq4Py++HFt9ZeDgdvatJWLgcOS7NYuIj0MuLhtuyPJwW21l+PGHWvQe0iSJEmLxoJNfUnyIXoXde6RZCO91VtOBc5LcjzwfeCY1vxC4AhgA3An8DKAqtqS5E3AZa3dG8cuLAVeSW9lmV3oXUR6UYtP9B6SJEnSorGQq74cO8GmQwe0LeCECY5zFnDWgPh64EkD4j8e9B6SJEnTsfXubSzdecm049JC6crFpJIkSZ3gBbTqiu09R12SJEnSNFioS5IkSR1koS5JkiR1kIW6JEmS1EEW6pIkSVIHWagvIlvv3jajuCRJkhYvl2dcRFwuSpIkacfhiLokSZLUQRbqkiRJUgdZqEuSJEkdZKEuSZIkdZCFuiRJAlxFTOoaV32RJE0pyRJgPfCDqnpukv2Ac4Hdga8DL62qu5I8ADgHeBrwY+BFVXV9O8aJwPHANuDVVXVxi68C3gEsAd5TVadu15PTLwxaXcyVxaThcURdkjQdrwGu6Xv9FuBtVbUCuIVeAU77eUtVPQ54W2tHkv2BFwNPBFYB/5BkSfsPwLuAw4H9gWNbW2m78FsEdZkj6pKkSSXZGzgSOAV4XZIAzwR+vzU5GzgZOB04qj0H+Cjwztb+KODcqvoZ8N0kG4ADW7sNVXVde69zW9tvLfBpSYDfIqjbHFGXJE3l7cBfAD9vrx8O3FpV97TXG4G92vO9gBsA2vbbWvtfxMftM1H8PpKsSbI+yfrNmzfP9ZwkqfMs1CVJE0ryXODmqrq8PzygaU2xbabx+warzqiqlVW1ctmyZZNkLUmjwakvkqTJPAN4fpIjgKXAQ+mNsO+aZKc2ar43sKm13wjsA2xMshPwMGBLX3xM/z4TxSVph+aIuiRpQlV1YlXtXVXL6V0M+oWqegnwReCFrdlq4Pz2/IL2mrb9C1VVLf7iJA9oK8asAL4GXAasSLJfkvu397hgO5yaJHWeI+qSpNl4PXBukjcD3wDObPEzgfe3i0W30Cu8qaqrk5xH7yLRe4ATqmobQJJXARfTW57xrKq6erueiSR1lIW6JGlaqupLwJfa8+v45aot/W22AsdMsP8p9FaOGR+/ELhwHlOVpJHg1BdJkiSpgyzUJUmSpA6yUJckSZI6yEJdkiRJ6iALdUmSJKmDLNQlSZKkDrJQlyRJkjrIQl2SJEnqIAt1SZIkqYMs1CVJkqQOslCXJEmSOshCXZIkSeogC3VJkiSpgyzUJUmSpA6yUJckSZI6yEJdkiRJ6iALdUmSJKmDLNQlSZKkDrJQHwFb7942o7gkSZK6b6dhJ6C5W7rzEpav/fR94tefeuQQspEkSdJ8cERdkiRJ6iALdUmSJKmDLNQlSZKkDrJQlyRJkjrIQl2SJEnqIAt1SZIkqYMs1CVJkqQOslCXJEmSOmgohXqS65NcmeSKJOtbbPcMqPK6AAASC0lEQVQk65Jc237u1uJJclqSDUm+meSAvuOsbu2vTbK6L/60dvwNbd9s/7OUJKmbvHO1tDgM886kv1NVP+p7vRb4fFWdmmRte/164HBgRXscBJwOHJRkd+AkYCVQwOVJLqiqW1qbNcAlwIXAKuCi7XNakiR1m3e0lhaHLk19OQo4uz0/Gzi6L35O9VwC7JpkT+A5wLqq2tKK83XAqrbtoVX11aoq4Jy+Y0mStENx9FxavIY1ol7AZ5MU8E9VdQbwyKq6EaCqbkzyiNZ2L+CGvn03tthk8Y0D4veRZA29kXf23XffuZ6TJEmdM2j03JFzaXEYVqH+jKra1IrxdUm+PUnbQfPLaxbx+wZ7/0E4A2DlypUD20iSJEnDMJSpL1W1qf28GfgEcCBwU5u2Qvt5c2u+Edinb/e9gU1TxPceEJckSZIWje1eqCd5UJKHjD0HDgOuAi4AxlZuWQ2c355fABzXVn85GLitTZG5GDgsyW5thZjDgIvbtjuSHNxWezmu71iSJEnzZqJrALw2QPNhGFNfHgl8oq2YuBPwwar6TJLLgPOSHA98Hzimtb8QOALYANwJvAygqrYkeRNwWWv3xqra0p6/EngfsAu91V5c8UWSJM3J1ru3sXTnJfeKuYKOFtJ2L9Sr6jrgyQPiPwYOHRAv4IQJjnUWcNaA+HrgSXNOVpIkqfHCXG1vXVqeUfNs0NdufhUnSZK0OAzzhkdaYP7PX5IkafFyRF2SNKkk+yT5YpJrklyd5DUtvnuSdUmubT93a/EkOS3JhiTfTHJA37FWt/bXJlndF39akivbPqe1xQAkaYdmoS5Jmso9wJ9W1ROAg4ETkuwPrAU+X1UrgM+31wCHAyvaYw1wOvQKe+Ak4CB6y/KeNFbctzZr+vZbtR3OS5I6zUJdkjSpqrqxqr7ent8BXEPvjs9HAWe3ZmcDR7fnRwHnVM8lwK7t/hjPAdZV1ZaqugVYB6xq2x5aVV9tCwic03csSdphWahLkqYtyXLgqcClwCPbvStoPx/Rmu0F3NC328YWmyy+cUB8/HuvSbI+yfrNmzfPx+lIUqdZqEuSpiXJg4GPAa+tqtsnazogVrOI3ztQdUZVrayqlcuWLZtOypK0qFmoS5KmlGRnekX6B6rq4y18U5u2Qvt5c4tvBPbp231vYNMU8b0HxCVph2ahLkmaVFuB5Uzgmqp6a9+mC4CxlVtWA+f3xY9rq78cDNzWpsZcDByWZLd2EelhwMVt2x1JDm7vdVzfsSRph+U66pKkqTwDeClwZZIrWuwNwKnAeUmOB74PHNO2XQgcAWwA7gReBlBVW5K8CbistXtjVW1pz18JvA/YBbioPaRFa+vd21i685Jpx6VBLNQlSZOqqq8weB45wKED2hdwwgTHOgs4a0B8PfCkOaQpdcqgmw6CNx7UzDj1RZIkSeogC/UdzNa7t80oLkmSpOFw6ssOxq/iJEmSFgdH1CVJkqQOslCXJEmSOshCXZIkSeogC3VJkhaZQQsAuCiANHq8mFSSpEVm0MIALgogjR5H1OfIEQxJkiQtBEfU58hRDUmSJC0ER9QlSZKkDrJQF+CFSZK02NlnS6PHqS8CnMIjSYudd56WRo8j6pIkSVIHWahLkiRJHWShLkmSJHWQhbokSdJ2MpPFG7xAWF5MKkmStJ1MtHiDFwJrEEfUNSH/hy9JkjQ8jqhrQi71JUmSNDyOqEuSJEkdZKEuSZIkdZCFuiRJktRBFuqSJElSB1moa8ZmsgasJEmSZsdVXzRjE60BK0mSpPnjiLokSVIH+Q22HFHXvNh69zaW7rxk2nFJkjQ5v8GWhbrmhTdHkiRp4TkwtmOxUNeCGtRx2JlIkjQ7DoztWCzUtaD82k6SJGl2vJhUkiRpkfPC09HkiLokSdIi5zfYo8kRdW13E/0P3//5S5Ik/ZIj6truJroQ5ttvWnWfmBeeSpI0O64Qs/hZqKszBhXwg4p3sJORJGkqrhCz+Fmoq9McfZckaX65dPLiYaGuRcnRd0mSZscLTxePkS3Uk6wC3gEsAd5TVacOOSUtsPkYfXc+nzQ89tvS8Pj5100jWagnWQK8C3g2sBG4LMkFVfWt4WamYZho5GCieXvTnc83k07NDlCanP22NFxONe2mkSzUgQOBDVV1HUCSc4GjADt8zcqgTmkmndpCjvZPN1//s6CO2y79tv8OpJmZ61RT/23NzagW6nsBN/S93ggcNKRcNAJmMp9vPtrOtahfqOPOx38A5nqMxdZ2In6g3cd26bddBUOau8n+HS1EUb8j/wc7VTXsHOZdkmOA51TVy9vrlwIHVtWfjGu3BljTXj4e+M4s3m4P4EdzSLfLPLfFa5TPz3O7r0dX1bL5TmZ7mk6/PaJ9dtfyge7l1LV8oHs5mc/UupbTtPrtUR1R3wjs0/d6b2DT+EZVdQZwxlzeKMn6qlo5l2N0lee2eI3y+XluI2vKfnsU++yu5QPdy6lr+UD3cjKfqXUxp+m437ATWCCXASuS7Jfk/sCLgQuGnJMkaWL225I0zkiOqFfVPUleBVxMb5mvs6rq6iGnJUmagP22JN3XSBbqAFV1IXDhdnirOX0N23Ge2+I1yufnuY2o7dRvd+3PuGv5QPdy6lo+0L2czGdqXcxpSiN5MakkSZK02I3qHHVJkiRpUbNQn6Ukq5J8J8mGJGuHnc98SHJ9kiuTXJFkfYvtnmRdkmvbz92Gned0JDkryc1JruqLDTyX9JzWfpffTHLA8DKf2gTndnKSH7Tf3RVJjujbdmI7t+8kec5wsp6eJPsk+WKSa5JcneQ1LT4qv7uJzm8kfn9d16V+e9C/42Ga6O/mkHNamuRrSf695fQ/h50T9O6im+QbSf5l2LnA4M/uIeeza5KPJvl2+/v0X4aYy+P7+tUrktye5LXDymdWqsrHDB/0LnT6D+AxwP2Bfwf2H3Ze83Be1wN7jIv9DbC2PV8LvGXYeU7zXH4LOAC4aqpzAY4ALgICHAxcOuz8Z3FuJwN/NqDt/u3v5wOA/drf2yXDPodJzm1P4ID2/CHA/9fOYVR+dxOd30j8/rr86Fq/Pejf8ZD/fAb+3RxyTgEe3J7vDFwKHNyBP6vXAR8E/mXYubR87vPZPeR8zgZe3p7fH9h12Dm1XJYAP6S3fvnQ85nuwxH12fnFra6r6i5g7FbXo+goev/oaD+PHmIu01ZVXwa2jAtPdC5HAedUzyXArkn23D6ZztwE5zaRo4Bzq+pnVfVdYAO9v7+dVFU3VtXX2/M7gGvo3bFyVH53E53fRBbV76/jOtVvz/Df8YKbxd/N7ZFTVdV/tpc7t8dQL6xLsjdwJPCeYebRVUkeSu8/oWcCVNVdVXXrcLP6hUOB/6iq7w07kZmwUJ+dQbe6HmqHNk8K+GySy9O7AyDAI6vqRuh15MAjhpbd3E10LqPy+3xVm/5xVt8UpUV7bkmWA0+lN4o2cr+7cecHI/b76yD/LKdpwN/NoWnTTK4AbgbWVdWwc3o78BfAz4ecR79Bn93D8hhgM/DeNj3oPUkeNOScxrwY+NCwk5gpC/XZyYDYKCyf84yqOgA4HDghyW8NO6HtZBR+n6cDjwWeAtwI/F2LL8pzS/Jg4GPAa6vq9smaDogtxvMbqd9fR/lnOQ0z+Le3XVTVtqp6Cr071R6Y5EnDyiXJc4Gbq+ryYeUwgS59du9Eb0rX6VX1VOAn9KYrDlV6N1F7PvCRYecyUxbqszPlra4Xo6ra1H7eDHyC3lfFN41NJWg/bx5ehnM20bks+t9nVd3UPtB+DrybX06PWHTnlmRneoXCB6rq4y08Mr+7Qec3Sr+/DvPPcgoT/NvrhDZ94kvAqiGm8Qzg+Umupzd16plJ/nmI+QATfnYPy0ZgY983Hx+lV7gP2+HA16vqpmEnMlMW6rMzcre6TvKgJA8Zew4cBlxF77xWt2argfOHk+G8mOhcLgCOayuIHAzcNjbNYrEYNy/7BfR+d9A7txcneUCS/YAVwNe2d37TlST05jZeU1Vv7ds0Er+7ic5vVH5/HTdy/fZ8muTf3tAkWZZk1/Z8F+BZwLeHlU9VnVhVe1fVcnp/f75QVX8wrHxg0s/uoaiqHwI3JHl8Cx0KfGtY+fQ5lkU47QVG+M6kC6lG81bXjwQ+0eur2Qn4YFV9JsllwHlJjge+DxwzxBynLcmHgEOAPZJsBE4CTmXwuVxIb/WQDcCdwMu2e8IzMMG5HZLkKfS+yr8eeAVAVV2d5Dx6HeU9wAlVtW0YeU/TM4CXAle2eakAb2BEfndMfH7Hjsjvr7O61m8P+ndcVWcOKx8m+LtZvbvFDsuewNlJltAbWDyvqjqxJGKHDPzsHm5K/AnwgfYf4usYcr+c5IHAs2n96mLjnUklSZKkDnLqiyRJktRBFuqSJElSB1moS5IkSR1koS5JkiR1kIW6JEmS1EEW6tICSfKbSb4z7DwkqeuSXJ/kWfNwnMOSfHIO+78kyWfnmsd8SPLqJKcOOw8Nl8szSpKkoWp3+3x5VX1ujsdZD7yqqi6Zl8QWWJIzgN+mdzOzP6qq9/VtW0rvHhEHtLuOagfkiLq0AJJ4MzFJ2o6SPB142GyL9IXqt5N8KckhE2z+d+C/AV8fv6GqtgIXAcctRF5aHCzUpRloX8+emORbSW5J8t4kS5MckmRjktcn+SHw3rFY3777JPl4ks1JfpzknX3b/ijJNe2YFyd59FBOUJKGKMkDkrw9yab2eHuSB/Rt/4skN7ZtL09SSR7XNh8O/Ou441WbQnJdkh8l+V9J7te2/WGSf0vytiRbgJNb7Ct9+z8xybokW5LclOQNLX6/JGuT/Efrz89LsvtMz7eq3lVVnwe2TtDkS8CRMz2uRoeFujRzLwGeAzwW+FXgr1r8V4DdgUcDa/p3aLfA/hfge8ByYC/g3LbtaHq3kf9dYBnwv4EPLfA5SFIX/SVwMPAU4MnAgbQ+Nskq4HXAs4DH0Zsy0u//AgZdF/QCYCVwAHAU8Ed92w6id5v7RwCn9O+U5CHA54DPAI9q7/n5tvnVwNEth0cBtwDvmuG5Tsc19P4ctIOyUJdm7p1VdUNVbaHXsR/b4j8HTqqqn1XVT8ftcyC9zvzPq+onVbW1qsZGbV4B/L9VdU1V3QP8NfAUR9Ul7YBeAryxqm6uqs3A/wRe2rb9HvDeqrq6qu5s2/rtCtwx4JhvqaotVfV94O38ss8G2FRVf19V9wzot58L/LCq/q712XdU1aVt2yuAv6yqjVX1M+Bk4IULMH3mDuBh83xMLSIW6tLM3dD3/Hv0CnCAzW1O4SD7AN9rhfh4jwbekeTWJLcCW4DQG3WXpB3Jo+j1q2P6+9hHce/+t/859Ea1HzLgmBP12YOO0W8f4D8m2PZo4BN9/fY1wDbgkQBj8bbtvwL/0hdbO8l7jvcQ4LYZtNeIsVCXZm6fvuf7Apva88mWULoB2HeC0ZYbgFdU1a59j12q6v/MU76StFhsolcEj+nvY28E9u7b1t8XA3yT3nTE8Sbqs2Hqfvuxk2w7fFy/vbSqfgDQHwe+Ajy3LzaTJRefQO+CU+2gLNSlmTshyd7twqE3AB+exj5fo/chc2qSB7ULUJ/Rtv0jcGKSJwIkeViSYxYkc0nqtg8Bf5VkWZI9gP8B/HPbdh7wsiRPSPLAtq3fhdx33jrAnyfZLck+wGuYXp8NveuKfiXJa9tFrg9JclDb9o/AKWNTFFu+R037LJsk92/LMAbYuX029Ndmv01v5RftoCzUpZn7IPBZehcgXQe8eaodqmob8Dx6FyN9H9gIvKht+wTwFuDcJLcDV9FbvUCSdjRvBtbTGx2/kt6yhW8GqKqLgNOAL9JbX/yrbZ+fte1fB27rK6bHnA9cDlwBfBo4czqJVNUdwLPp9d0/BK4FfqdtfgdwAfDZJHcAl9C7MHWmPgv8FPgN4Iz2/LfgF+uoHwGcPYvjakR4wyNpBubrphySpLlJ8gR6AxsPGLv+J8lhwH+rqqPb6wJWVNWG4WU6O0n+BNinqv5i2LloeLwpiyRJWhSSvIDeqPiD6H0T+an+i/Sr6rP0RqkXvar6+2HnoOFz6oskSVosXgFsprcayzbglcNNR1pYTn2RJEmSOsgRdUmSJKmDLNQlSZKkDrJQlyRJkjrIQl2SJEnqIAt1SZIkqYMs1CVJkqQO+v8BWwzvPiPbw8QAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x23a135585f8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.subplot(1, 2, 1)\n",
    "(train['price']).plot.hist(bins=50, figsize=(12, 6), edgecolor = 'white', range = [0, 250])\n",
    "plt.xlabel('price', fontsize=12)\n",
    "plt.title('Price Distribution', fontsize=12)\n",
    "\n",
    "plt.subplot(1, 2, 2)\n",
    "np.log(train['price']+1).plot.hist(bins=50, figsize=(12,6), edgecolor='white')\n",
    "plt.xlabel('log(price+1)', fontsize=12)\n",
    "plt.title('Price Distribution', fontsize=12)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The price of items are left skewed, vast majority of items priced at 10-20. However, the most expensive item priced at 2009. So we make log-transformation on the price."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Shipping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    0.552411\n",
       "1    0.447589\n",
       "Name: shipping, dtype: float64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['shipping'].value_counts() / len(train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Over 55% of items shipping fee were paid by the buyers. How shipping related to price?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABDsAAAH4CAYAAABE9+afAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3Xuc1VW9//HXR1BBUVNRT0WKmIBycZCLio6OefB+KW+ppOIlMW+JlVYePUmQniOZmnpMS9GfVqh5A7VS8AJKEjctFFRoDNISJG7iBWT9/vjuGfcMM7CZG/j19Xw85jGzv9+11vez9555PNhv1lrfSCkhSZIkSZKUFxut7wIkSZIkSZKakmGHJEmSJEnKFcMOSZIkSZKUK4YdkiRJkiQpVww7JEmSJElSrhh2SJIkSZKkXDHskCTlTkT8KCJS0ddbEfG7iNilhL6DCn3atUCdzxTVuCIi3omIsRFxfkRs2pi6IqJz4XX4XIntOxbGP7LoWGVEjFi3Z1Xv+OdExFfrON5k1yixjhQRFzTT2BtHxCUR8deIWB4RCyLixYj4flGbikIN3dcy1siImNxMda72XjeXoue7xq/mrkOS9NnTen0XIElSM1kMHFr4uRPwY2BsRHRLKb23hn6PAfsAy5u5vipPAz8k+w+I7YAK4H+AsyLiKymlRQ2sqzPw38BIYNGamwLwdmH8maUWvo7OAf4KPFzr+NeAd5vpmi3tJmAgMBx4EfgcsDdwFHDNOo71Y6Btk1b3ieZ+r4tNLVyryr7ACODYQh2SJDULww5JUl6tTCn9qfDznyLi78B44HDg/tqNI6IV0CqlNB+Y33JlsrCoToBHIuJO4AXgZ8AZAM1ZV0S0SSl9APxprY2bWEppWktfszlExGZk79XlKaVri049GBGxruOllGY3WXGrj/0hLfRep5SWFF8rItoXfpyWUqpsiRokSZ9NLmORJH1WTCl87wifLBOIiK9GxAzgA2CvupaLRETbiPjfiHgzIj6MiL9FxNXFg0fE2RExo3D+zYi4tKGFppRepjBLICK2LIxfV10/iIg3IuKDiPhXRPw+Iv4jIiqA0YVmfyv0q6w1Tr/CMpr3ge+taWlDRFwREf+MiGURcW9EbFV0rs7lNcXLUyLiGaA3cHrR0oVBtdsV9T0xIv5SeC3nRsTwiGhddL7qmj0i4smIeC8iZkbEsSW+xJtExA0RsTAiFkXEzyNik8LY2xRez9Nr1RSF9/26esbcHNgY+GftEymlupZptI+I+wuv6ZyIOK/W9WosYyl6zn0jYnxEvB8Rr0XE12r1eyYiHohs2VBlod1jEfHFojb1LlmKiCERMS8i/h0Rv41ay6AiomdEvFB4jWZExOGFv6OR9bwuJYmI4yJiZXGdheNdC7UeXHj8p4i4JyIuiIi/R7Zc6JGI+I9a/TaLiOsi4h+F36OpETGgMTVKkj5dDDskSZ8VHQvf/1nr2P8CV5PN+Phb7U4REcAjwLeAmwvt/htoX9Tme8D/kS3ROLLw84+jcXtDPEn24XnPuk5GxGlky1+uAw4p1PcG2YfuqcB3C02PJVtG8LVaQ/wGGFN4PmPWUMfJwH8C3wQuAY4AfrmOz+U8siUTjxdq2YdsWc5qCh9qRxWewzHAzwvP5aY6mv8aeJTsub0O/DYiOpRQz3eADmRLToaRLbEZDpBSWgg8RGFGTZEKst+XO+sasDDzZi7wo4g4NiK2WEsNtwMvFWp/Brg5IvqVUPsost/HY4G/APdHxB612uwDXEj2fp0F9GT15UN1ORE4iOz1uIzsd/knVScjm73yB7LlNSeTvXY/A3YsYey1eZRsOdOptY6fAcwDnio6diBwJvBtYDDQj6LZWkV/s6cAV5EtI/or8FhE7NYEtUqSPgVcxiJJyq2i2QCdgFuApdT80LQt8J8ppelFfWoPczAwADgmpfRo0fG7C+23JAs/hqWUriqce7LwwfC/IuL/UkofN6D8eYXvO9Rzvh/wx5TSLUXHHix6HrMKP9a3XODGlNINRe071nOdtsARKaVlhXbvAf8vInZLKb26ticBkFJ6pdBvfq0lO3UZCjyTUqqaWfH7wntydUQMSynNK2r7s5TSHYW6pgD/IvuAfutarrEUOCGltAp4IrLNYC+PiKsLYcevgD9GRKeU0pxCnzOAKSmlv6xh3EHAb4HfAasiYlrh8Y0ppY9qtf1NSmlYofZnyD6QHwtMWkvtv0wpVc2Y+QPwCvAD4KSiNtsD/VNKbxbavQlMiIhDU0q/X8PYK4CvppRWFvrtXhi3atbJGWR/M31SSv8otJlNtj9Jo6SUVkTE/yN7Da8pjN2KLPy4o/BeVWlfqOHtQru3gKcioiKl9AxZgPefwN4ppara/hgRXcgCwtqBiiQph5zZIUnKq23JPrytAGaRBR5fr/qAVPCP4qCjHl8h21fj0XrO70M2m+L+iGhd9QWMIwsqSplpUJe17fMwHTg8Iq6KbElKq3Ucv86ZFXV4siroKHiwUFvfdbzeWhWew56svqfKKLJ/s+xT6/gfq35IKb0LvENpr/cjtT48P0gW6lTdIWUs8CZweqGuLciCiDpndRTVMA7YhWzWwx1kv4PXAuMiova/uYprX0E2M6WU2h8q6reKbAZD7RkhU6uCjkK758lem7XNHHm6KugoeAXYvmqJD9l7PqUq6CiMPYksZGoKvwK6RETV+3wI8HmyTXaL/an47zilNBZYwifP7z+BSmBKrb/JsUCfJqpVkrSBM+yQJOXVYrIPZ33IPkR2TCk9UatNKR/StmXNd42oWs4yg0/ClRVkd1kB+FKpBddStXdBfTXeQfa/1CeS/c/6vyLix+sQepT6AfWd4gcppfeBZWQfQptae7KlO7Vrq3q8Ta3jte8y8xHQpoTrvFPP489D9R4bd5LtMRJkr3FrsmUza5RSWppS+m1K6Zt8chegfclmbjRX7bXfi9pt6mtXW101BVAVdvwHdW+S2yQb5xZmCk3kkyVEZwDPpZTeqNV0bc+vPdmSoxW1vn5Aw/8eJUmfMi5jkSTl1cqU0uS1tKlr48ja3mXNHxIXFr4fSd0Bwqw6jpXiYLIPaFPqOln4X/2fAT+LiC/xyS1P/8Hal3FAac8dsiUR1SKiLdCOTwKgDwrfN6GmrUscv9gCsue8fa3jVUt5FtI0ao9f9bg41LqTbHnSgWRLKx5OKf17XS6SUkoRcS1wBdCVbBZGY21PzVv1bs/qYVzt51dfu3X1T6BLHce3a+S4xX5J9jtdtdfGuXW0WdvzW0i2/86JdbRbVccxSVIOObNDkqQ1GwtsE3XcpaRgIvA+8IWU0uQ6vpau6wUjoidwPnBPKf1TSnNTSteQbVC6e+Fw1R4RpcwWWJMBUfNOK8eSBSVVQVLVHhrVGz9GxF7AlrXGWevMhcLeJlOAE2qdOpHsQ+rEdaq8fsfUWlZyLNl7+NeiWuaSLTW5CtiPtSxhiYiNa9+5pGDXwvemWupRvdFs4Tkcw+r7fOwZETsWtduXLAxY234ga/NnoE+tO7v0o/59ZRriPrJ/n/6WLPh6oI42e0dEdQAZEQeR/b5VPb+xZLO5/l3H3+PUJqxVkrQBc2aHJElr9iTZHSh+HRFDye4S8nlg/5TS4JTSooj4EXBDROwEPEf2Ya0zcGBKqfZdUGrbJiL2LvTZlmwmwTeB18juplGniPgF2f9g/4lsyc6BZB+sLys0qZpRMjgifgssX8vmmvV5n+wuFteSPe9rgYdSSq8Uzk8im01yY0RcQbbU5FKyPRSKzQQOiYhDyGYm/K2wz0Zt/w38ISLuJPvA24NsKcjttTYnbYwtyPZYuR3oBlwJ3FTYnLTYr8j2D5lH9nuwJlsBr0XEXWRLmBaTzYL4Adnr89Aa+q6LsyPiI7Jg5pvAl8n2CCn2DjCm8HvZBvgfsn081rQ5aSnuBP6rMPZVZPucXEW2jKVJZkyklJZFxCiyu8jcWWu/mCoLyH4nh5LNMroWeKGwOSlkdxd6DhgbEf8DvAp8jsKdjVJKVzZFrZKkDZthhyRJa1BYivA1sg/cF5NN2X+Lov0bUkr/W7gjxBCy25p+QBZWjCrhEgeSzVhYSbZnwl+A75PddePDNfSbSPZhdzDZB9o3gG+mlB4u1PRmRHwXuIjsNqTz+OT2u+vit2R3L/kV2QfLR8luc0vhOh8VXp9byP4Xflbh/L21xhlGdovS+8j+F/4MVt94kpTSHyPiJLIP1QPJPrj/lCwEaSo/JdtP4zdkIdMvyfY/qW0M2ftyV60NTeuyhOw2xoeT3fJ0S7KQ4w9kd+pZ3DSlcxLZ8qVhZO/p11NK02q1mUh216HryX5fnyG7nWyjpJSWR8ShZLdWHkW2CeilZM+7drjVGA+ThR131HP+abKQ7SaygPApsr+DqjpXFWZiXQl8j2yWx7vANOCG1UaTJOVSZHtwSZIkqVhEHE4WeHSuY5PMlq5lENnMii3qme1Q1e4ZYEFK6fgWqmtnsmDvnJTSGpf6rMOYNwKHppQ613HuT8AbKaVvNMW1JEn55cwOSZKkIhHxBbIlQdcAj6/voGNDEhE/IJvZ9CbZTJ0fkC1j+V0TjN2V7Pa/Z5PNGJEkqcEMOyRJkmo6h2wZzVSyJUD6RCJbUvQF4ENgPPDdlFJTLGMZCexBFpyUckchSZLq5TIWSZIkSZKUK956VpIkSZIk5YphhyRJkiRJyhX37KhD+/btU8eOHdd3GZIkSZIkqciUKVMWpJS2W1s7w446dOzYkcmTJ6/vMiRJkiRJUpGIeLOUdi5jkSRJkiRJuWLYIUmSJEmScsWwQ5IkSZIk5Yp7dkiSJEnSBmLFihXMmzePDz74YH2XIq1Xbdq0oUOHDmy88cYN6m/YIUmSJEkbiHnz5rHFFlvQsWNHImJ9lyOtFykl3n33XebNm8fOO+/coDFcxiJJkiRJG4gPPviAbbfd1qBDn2kRwbbbbtuoGU6GHZIkSZK0ATHokBr/d2DYIUmSJEmq1qpVK8rKyujevTsnnHACy5cvr7Pd4YcfzqJFi5q1lmeeeYYjjzyyycddtWoVF110Ed27d6dHjx707duXv/3tb2vsU1FRweTJkwHo2LEjCxYsaPK6msPw4cPp1q0bPXv2pKysjBdffBGo/zk8+uijXHPNNQ2+Xv/+/Rvctym5Z4ckSZIkbaB++tjfm3S87xyx41rbtG3blunTpwMwcOBAbr31Vi655JLq8yklUko8/vjjTVpbSxo1ahRvvfUWL7/8MhtttBHz5s1j8803b7brffzxx7Rq1arZxq/PxIkTGTNmDFOnTmXTTTdlwYIFfPTRR2vsc/TRR3P00Uc3+JovvPBCg/s2JWd2SJIkSZLqVF5ezhtvvEFlZSW77bYb5513HnvuuSdz586tMTPg7rvvpmfPnuyxxx6ceuqpAMyfP5/jjjuOvn370rdvX55//vnVxj/88MN5+eWXAejVqxdDhw4F4IorruCXv/wlAMuWLeP444+na9euDBw4kJQSAFOmTOGAAw6gd+/eHHLIIbz99ttANgPjsssuo1+/fnTu3Jnx48evdt23336bz3/+82y0UfaRuEOHDmy99dYA/PGPf2SfffZhzz335IQTTmDZsmVrfI3uuece+vXrR1lZGYMHD+bjjz8GoF27dlx55ZXstddeTJw4sUafiooKLr74Yvr370/37t2ZNGkSAJMmTaJ///706tWL/v37M2vWrOr3oSqAAth33315+eWXefbZZykrK6OsrIxevXqxdOnS1Z5n+/bt2XTTTQFo3749X/jCF6rP//znP2fPPfekR48ezJw5E4CRI0dywQUXADBo0CDOPfdcysvL6dy5M2PGjKluc8wxx3DooYfSpUsXrrrqquox27VrB2SzcioqKup87x5//HG6du3Kfvvtx0UXXdQss3cMOyRJkiRJq1m5ciVPPPEEPXr0AGDWrFmcdtppTJs2jZ122qm63YwZMxg+fDjjxo3jpZde4oYbbgDg29/+NkOGDOHPf/4zv/vd7zj77LNXu8b+++/P+PHjWbJkCa1bt64ORCZMmEB5eTkA06ZN4/rrr+eVV15hzpw5PP/886xYsYILL7yQBx54gClTpnDmmWdy+eWX16h90qRJXH/99TU+iFc58cQTGT16NGVlZXznO99h2rRpACxYsIBhw4bx1FNPMXXqVPr06cN1111X72v06quvMmrUKJ5//nmmT59Oq1atuPfeewF477336N69Oy+++CL77bffan3fe+89XnjhBW655RbOPPNMALp27cpzzz3HtGnTGDp0KD/84Q8BOPvssxk5ciQAr732Gh9++CE9e/ZkxIgR3HzzzUyfPp3x48fTtm3bGtc4+OCDmTt3Lp07d+a8887j2WefrXG+ffv2TJ06lW9961uMGDGizudYWVnJs88+y2OPPca5555bvWnopEmTuPfee5k+fTr3339/9RKfYnW9dx988AGDBw/miSeeYMKECcyfP7/e17cxXMYiSZIkSar2/vvvU1ZWBmQzCs466yzeeustdtppJ/bee+/V2o8bN47jjz+e9u3bA7DNNtsA8NRTT/HKK69Ut1uyZAlLly5liy22qD5WXl7OjTfeyM4778wRRxzBk08+yfLly6msrKRLly68/fbb9OvXjw4dOgBQVlZGZWUln/vc5/jrX//KgAEDgGyZyOc///nqcY899lgAevfuTWVl5Wo1d+jQgVmzZjFu3DjGjRvHQQcdxP3338/777/PK6+8wr777gvARx99xD777FPvazV27FimTJlC3759q1+77bffHsj2PjnuuOPq7XvyyScDWeCzZMkSFi1axNKlSzn99NN5/fXXiQhWrFgBwAknnMCPf/xjrr32Wu644w4GDRoEZDM8LrnkEgYOHMixxx5b/TpVadeuHVOmTGH8+PE8/fTTfP3rX+eaa66p7l/8Oj344IN11nniiSey0UYbseuuu9KpU6fqGSADBgxg2223rR5nwoQJ9OnTp0bfut67du3a0alTp+pbyp588sncdttt9b5ODWXYIUmSJEmqVrxnR7H69rRIKdV554xVq1YxceLE1WYbFOvbty+TJ0+mU6dODBgwgAULFnD77bfTu3fv6jZVSzAgCxBWrlxJSolu3bqttjykdp+q9vW1OeywwzjssMPYYYcdePjhhzn44IMZMGAAv/nNb+qtufZzP/3007n66qtXO9emTZs17tNR+zWLCK644goOPPBAHnroISorK6moqABgs802Y8CAATzyyCPcd9991bMovv/973PEEUfw+OOPs/fee/PUU0/RtWvXGuO2atWKiooKKioq6NGjB3fddVd12FHK61RXnWs6Xqy+964luIxFkiRJktRgBx10EPfddx/vvvsuAAsXLgSyJRQ33XRTdbu6ApRNNtmEL33pS9x3333svffelJeXM2LEiOolLPXp0qUL8+fPrw47VqxYwYwZM0queerUqbz11ltAFsq8/PLL1TNXnn/+ed544w0Ali9fzmuvvbbG5/7AAw/wzjvvVD/3N998s6QaRo0aBWRLdrbaaiu22morFi9ezBe/+EWA6mUrVc4++2wuuugi+vbtWz17Zvbs2fTo0YPLLruMPn36VM+6qDJr1ixef/316sfTp0+vsQSpFPfffz+rVq1i9uzZzJkzhy5dugDw5JNPsnDhQt5//30efvjh6tkwa9O1a1fmzJlTPeOm6nVoas7skCRJkiQ1WLdu3bj88ss54IADaNWqFb169WLkyJHceOONnH/++fTs2ZOVK1ey//77c+utt67Wv7y8nLFjx7LZZptRXl7OvHnz1hp2bLLJJjzwwANcdNFFLF68mJUrV3LxxRfTrVu3kmp+5513+OY3v8mHH34IZMstLrjgAtq0acPIkSM5+eSTq88NGzaMzp071znO7rvvzrBhwzj44INZtWoVG2+8MTfffHNJgcLWW29N//79WbJkCXfccQcAl156KaeffjrXXXcdX/nKV2q07927N1tuuSVnnHFG9bHrr7+ep59+mlatWrH77rtz2GGH1eizbNkyLrzwQhYtWkTr1q358pe/vM5LRrp06cIBBxzAv/71L2699VbatGkDwH777cepp57KG2+8wSmnnLLaEpb6tG3blltuuYVDDz2U9u3b069fv3Wqp1TRUlNIPk369OmT6tpcRZIkSZKa06uvvspuu+22vstQM6uoqGDEiBElBwQAb731FhUVFcycObP6LjLNbdCgQRx55JEcf/zxNY6PHDmSyZMn15i5sy6WLVtGu3btSClx/vnns+uuuzJkyJDV2tX19xARU1JKa33hXMYiSZIkSdIG7O6772avvfZi+PDhLRZ0NKfbb7+dsrIyunXrxuLFixk8eHCTX8OZHXVwZockSZKk9cGZHdInnNkhSZIkSZJU4AalUg4cdc9RTTbW6G+MbrKxJEmSJGl9cGaHJEmSJEnKFcMOSZIkSZKUK4YdkiRJkqRqrVq1oqysjO7du3PCCSewfPnyOtsdfvjhLFq0qFlreeaZZzjyyCObfNzKykratm1LWVkZe+yxB/3792fWrFlNfp0NxfDhw+nWrRs9e/akrKyMF198EYCOHTuyYMGC1do/+uijXHPNNQ2+Xv/+/Rvct6m4Z4ckSZIkbaCacm82KG1/trZt2zJ9+nQABg4cyK233soll1xSfT6lREqJxx9/vElra2m77LJL9fP8xS9+wU9+8hPuuuuuZrvexx9/TKtWrZpt/PpMnDiRMWPGMHXqVDbddFMWLFjARx99tMY+Rx99NEcffXSDr/nCCy80uG9TcWaHJEmSJKlO5eXlvPHGG1RWVrLbbrtx3nnnseeeezJ37twaswLuvvtuevbsyR577MGpp54KwPz58znuuOPo27cvffv25fnnn19t/MMPP5yXX34ZgF69ejF06FAArrjiCn75y18CsGzZMo4//ni6du3KwIEDSSkBMGXKFA444AB69+7NIYccwttvvw1ARUUFl112Gf369aNz586MHz9+rc9zyZIlbL311gCMHDmSCy64oPrckUceyTPPPMOvfvUrhgwZUn389ttvrw6B7rnnHvr160dZWRmDBw/m448/BqBdu3ZceeWV7LXXXkycOLHGNSsqKrj44ovp378/3bt3Z9KkSQBMmjSJ/v3706tXrxozTsrLy6vDGYB9992Xl19+mWeffZaysjLKysro1asXS5curXGdt99+m/bt27PpppsC0L59e77whS9Un//5z3/OnnvuSY8ePZg5c+Zqr8GgQYM499xzKS8vp3PnzowZM6a6zTHHHMOhhx5Kly5duOqqq6rHbNeuHZDNzKmoqKjz/Xv88cfp2rUr++23HxdddFGTz+Bp0bAjIk6KiFcj4r2ImB0R5YXjB0XEzIhYHhFPR8RORX02jYg7ImJJRPwzIi6pNWaD+0qSJEmS6rZy5UqeeOIJevToAcCsWbM47bTTmDZtGjvtVP2xixkzZjB8+HDGjRvHSy+9xA033ADAt7/9bYYMGcKf//xnfve733H22Wevdo3999+f8ePHs2TJElq3bl0diEyYMIHy8nIApk2bxvXXX88rr7zCnDlzeP7551mxYgUXXnghDzzwAFOmTOHMM8/k8ssvr1H7pEmTuP7662t8CC82e/ZsysrK2GWXXbjuuutqzF6py0knncSjjz7KihUrALjzzjs544wzePXVVxk1ahTPP/8806dPp1WrVtx7770AvPfee3Tv3p0XX3yR/fbbb7Ux33vvPV544QVuueUWzjzzTAC6du3Kc889x7Rp0xg6dCg//OEPATj77LMZOXIkAK+99hoffvghPXv2ZMSIEdx8881Mnz6d8ePH07Zt2xrXOPjgg5k7dy6dO3fmvPPO49lnn61xvn379kydOpVvfetbjBgxos7nXllZybPPPstjjz3GueeeywcffABkwcy9997L9OnTuf/++5k8efJqfet6/z744AMGDx7ME088wYQJE5g/f/4aX/uGaLGwIyIGAP8DnAFsAewPzImI9sCDwBXANsBkYFRR1x8BuwI7AQcCl0bEoYUxG9xXkiRJkrS6999/n7KyMvr06cOOO+7IWWedBcBOO+3E3nvvvVr7cePGcfzxx9O+fXsAttlmGwCeeuopLrjgAsrKyjj66KNZsmTJarMOysvLee6555gwYQJHHHEEy5YtY/ny5VRWVtKlSxcA+vXrR4cOHdhoo40oKyujsrKSWbNm8de//pUBAwZQVlbGsGHDmDdvXvW4xx57LAC9e/emsrKyzudZtYxl9uzZXH/99ZxzzjlrfF0233xzvvKVrzBmzBhmzpzJihUr6NGjB2PHjmXKlCn07duXsrIyxo4dy5w5c4Bs/5Pjjjuu3jFPPvlkIAt9lixZwqJFi1i8eDEnnHAC3bt3Z8iQIcyYMQOAE044gTFjxrBixQruuOMOBg0aBGQzPC655BJuvPFGFi1aROvWNXeraNeuHVOmTOG2225ju+224+tf/3p1aFLqa3XiiSey0UYbseuuu9KpU6fqGSADBgxg2223pW3bthx77LFMmDBhtb51vX8zZ86kU6dO7LzzzjVeh6bUknt2XAUMTSn9qfD4HwARcQ4wI6V0f+Hxj4AFEdE1pTQTOA04I6X0b+DfEXE7MAj4PXBsI/pKkiRJkmop3rOj2Oabb15n+5QSEbHa8VWrVjFx4sTVZhoU69u3L5MnT6ZTp04MGDCABQsWcPvtt9O7d+/qNlXLLyALD1auXElKiW7duq22NKR2n6r2a3P00UdzxhlnANC6dWtWrVpVfa5qFgNksyt+8pOf0LVr1+r2KSVOP/10rr766tXGbdOmzRr36aj9ukUEV1xxBQceeCAPPfQQlZWVVFRUALDZZpsxYMAAHnnkEe67777qWRTf//73OeKII3j88cfZe++9eeqpp+jatWuNcVu1akVFRQUVFRX06NGDu+66qzosKeW1qqvONR0vVt/719xaJOyIiFZAH+DRiHgDaAM8DHwP6Aa8VNU2pfReRMwGukXEv4AvFJ8v/PzVws+N6Ss12E8f+3uTjPOdI3ZsknEkSZKk9eWggw7ia1/7GkOGDGHbbbdl4cKFbLPNNhx88MHcdNNNfO973wNg+vTplJWV1egCD0TtAAAgAElEQVS7ySab8KUvfYn77ruPK664gvnz5/Pd736X7373u2u8ZpcuXZg/fz4TJ05kn332YcWKFbz22mt069atQc9hwoQJ7LLLLkB2h5JbbrmFVatW8Y9//KN6Lw2Avfbai7lz5zJ16tTqvUYOOuggjjnmGIYMGcL222/PwoULWbp0aY2lPvUZNWoUBx54IBMmTGCrrbZiq622YvHixXzxi18EqDEDA7Kw5aijjqK8vLx6Bs3s2bPp0aMHPXr0YOLEicycObNG2DFr1qzqWRmQvQ+l1Fbs/vvv5/TTT+dvf/sbc+bMoUuXLkybNo0nn3yShQsX0rZtWx5++GHuuOOOksbr2rUrc+bMobKyko4dOzJq1Ki1d1pHLTWzYwdgY+B4oBxYATwC/BfQDqi9QGcx2VKXdkWPa5+jkX1rKMwwOQdgxx39ACpJkiRJpejWrRuXX345BxxwAK1ataJXr16MHDmSG2+8kfPPP5+ePXuycuVK9t9/f2699dbV+peXlzN27Fg222wzysvLmTdvXvV+HfXZZJNNeOCBB7joootYvHgxK1eu5OKLL16nsKNqz46UEptsskn1hqj77rsvO++8Mz169KB79+7sueeeNfqdeOKJTJ8+vXpD0913351hw4Zx8MEHs2rVKjbeeGNuvvnmkgKFrbfemv79+7NkyZLqoODSSy/l9NNP57rrruMrX/lKjfa9e/dmyy23rJ5VAnD99dfz9NNP06pVK3bffXcOO+ywGn2WLVvGhRdeWL3E5ctf/jK33XZbya8TZOHSAQccwL/+9S9uvfVW2rRpA8B+++3HqaeeyhtvvMEpp5xCnz59Shqvbdu23HLLLRx66KG0b9+efv36rVM9pYiWmD4SEVsDC4FBKaW7CseOIws7ngM2TimdV9T+L2T7bYwr9NshpfROUb8fpZR6RMQNDe27pnr79OmT6tpYRaqyoc3saMpbkpVyOzJJkiQ1j1dffZXddtttfZehNTjyyCMZMmQIBx10UKPGqaioYMSIESUHBABvvfUWFRUVzJw5k402apktOAcNGsSRRx7J8ccfX+P4yJEjmTx5MjfddFODxl22bBnt2rUjpcT555/PrrvuWuNuN1D330NETEkprfVFa5FXp7BnxjygrmRlBrBH1YOI2BzYhWwvjn8DbxefL/w8own6SpIkSZJUkkWLFtG5c2fatm3b6KCjIe6++2722msvhg8f3mJBR3O6/fbbKSsro1u3bixevJjBgwc36fgtMrMDICKGAocBR5AtY3kUeAa4EXgDOBN4jGwj0wNSSnsX+l0D7EO218YOwNNkm47+PiK2a2jfNdXqzA6tjTM7JEmS1Byc2SF9YoOf2VHwY+DPwGvAq8A0YHhKaT5wHDAc+DewF3BSUb//BmYDbwLPAtdWhRWN6StJkiRJkvKpxW49m1JaAZxX+Kp97img62qdsnMfks3cOLOe8w3uK0mSJEkbmvpu5Sp9ljR2Fcqnf6GPJEmSJOVEmzZtePfddxv9QU/6NEsp8e6771bf9aUhWmxmhyRJkiRpzTp06MC8efOYP3/++i5FWq/atGlDhw4dGtzfsEOSJEmSNhAbb7wxO++88/ouQ/rUcxmLJEmSJEnKFcMOSZIkSZKUK4YdkiRJkiQpVww7JEmSJElSrhh2SJIkSZKkXDHskCRJkiRJuWLYIUmSJEmScsWwQ5IkSZIk5YphhyRJkiRJyhXDDkmSJEmSlCuGHZIkSZIkKVcMOyRJkiRJUq4YdkiSJEmSpFwx7JAkSZIkSbli2CFJkiRJknLFsEOSJEmSJOWKYYckSZIkScoVww5JkiRJkpQrhh2SJEmSJClXDDskSZIkSVKuGHZIkiRJkqRcMeyQJEmSJEm5YtghSZIkSZJyxbBDkiRJkiTlimGHJEmSJEnKFcMOSZIkSZKUK4YdkiRJkiQpVww7JEmSJElSrhh2SJIkSZKkXDHskCRJkiRJuWLYIUmSJEmScsWwQ5IkSZIk5YphhyRJkiRJyhXDDkmSJEmSlCuGHZIkSZIkKVcMOyRJkiRJUq4YdkiSJEmSpFwx7JAkSZIkSbli2CFJkiRJknLFsEOSJEmSJOWKYYckSZIkScoVww5JkiRJkpQrhh2SJEmSJClXDDskSZIkSVKuGHZIkiRJkqRcMeyQJEmSJEm5YtghSZIkSZJyxbBDkiRJkiTlimGHJEmSJEnKFcMOSZIkSZKUK4YdkiRJkiQpV1qv7wKkT6NfzTqzScb5zhFPNck4kiRJkqRPOLNDkiRJkiTlimGHJEmSJEnKFZex6DPlp4/9fX2XIEmSJElqZi02syMinomIDyJiWeFrVtG5UyLizYh4LyIejohtis5tExEPFc69GRGn1Bq3wX0lSZIkSVL+tPQylgtSSu0KX10AIqIb8AvgVGAHYDlwS1Gfm4GPCucGAv9X6NOovpIkSZIkKZ82hGUsA4HRKaXnACLiCuDViNgCWAUcB3RPKS0DJkTEo2Thxvcb2VeSJEmSJOVQS8/suDoiFkTE8xFRUTjWDXipqkFKaTbZbIzOha+PU0qvFY3xUqFPY/vWEBHnRMTkiJg8f/78RjxFSZIkSZK0PrXkzI7LgFfIwoiTgNERUQa0AxbXarsY2AL4eA3naGTfGlJKtwG3AfTp0yeV9IykHDrqnqOaZJzR3xjdJONIkiRJ0rpqsbAjpfRi0cO7IuJk4HBgGbBlreZbAkvJlqLUd45G9pUkSZIkSTm0PvfsSEAAM4A9qg5GRCdgU+A1ssCidUTsmlJ6vdBkj0IfGtlXn0G/mnXm+i5BkiRJktTMWiTsiIjPAXsBzwIrga8D+wMXF2qYGBHlwFRgKPBgSmlpoe+DwNCIOBsoA44B+heGvrcRfaX1rqmWjEiSJEmSPtFSG5RuDAwD5gMLgAuBr6aUZqWUZgDnkgUX75DtqXFeUd/zgLaFc78BvlXoQ2P6SpIkSZKkfGqRmR0ppflA3zWc/zXw63rOLQS+2hx9JUmSJElS/rT0rWclSZIkSZKalWGHJEmSJEnKFcMOSZIkSZKUK4YdkiRJkiQpVww7JEmSJElSrhh2SJIkSZKkXDHskCRJkiRJuWLYIUmSJEmScsWwQ5IkSZIk5YphhyRJkiRJyhXDDkmSJEmSlCuGHZIkSZIkKVdar+8CpLX56WN/X98lSJIkSZI+RZzZIUmSJEmScsWwQ5IkSZIk5YphhyRJkiRJyhXDDkmSJEmSlCuGHZIkSZIkKVcMOyRJkiRJUq4YdkiSJEmSpFxpvb4LkNR4s995v8nG2mX7tk02liRJkiStD87skCRJkiRJuWLYIUmSJEmScsWwQ5IkSZIk5YphhyRJkiRJyhXDDkmSJEmSlCuGHZIkSZIkKVcMOyRJkiRJUq4YdkiSJEmSpFwx7JAkSZIkSbli2CFJkiRJknLFsEOSJEmSJOWKYYckSZIkScoVww5JkiRJkpQrrdd3AZLy6ah7jmqScUZ/Y3STjCNJkiTps8OZHZIkSZIkKVcMOyRJkiRJUq4YdkiSJEmSpFwx7JAkSZIkSbniBqXSejT7nffXdwmSJEmSlDvO7JAkSZIkSbli2CFJkiRJknLFsEOSJEmSJOWKYYckSZIkScoVww5JkiRJkpQrhh2SJEmSJClXDDskSZIkSVKuGHZIkiRJkqRcMeyQJEmSJEm5YtghSZIkSZJyxbBDkiRJkiTlimGHJEmSJEnKFcMOSZIkSZKUK4YdkiRJkiQpVww7JEmSJElSrhh2SJIkSZKkXDHskCRJkiRJudLiYUdE7BoRH0TEPUXHTomINyPivYh4OCK2KTq3TUQ8VDj3ZkScUmu8BveVJEmSJEn5sz5mdtwM/LnqQUR0A34BnArsACwHbqnV/qPCuYHA/xX6NKqvJEmSJEnKp9YtebGIOAlYBLwAfLlweCAwOqX0XKHNFcCrEbEFsAo4DuieUloGTIiIR8nCje83sq8kSZIkScqhFpvZERFbAkOB79Q61Q14qepBSmk22WyMzoWvj1NKrxW1f6nQp7F9JUmSJElSDrXkMpYfA79KKc2tdbwdsLjWscXAFms519i+NUTEORExOSImz58/fy1PRZIkSZIkbahaJOyIiDLgP4Gf1XF6GbBlrWNbAkvXcq6xfWtIKd2WUuqTUuqz3Xbb1f9kJEmSJEnSBq2l9uyoADoCf48IyGZdtIqI3YHfA3tUNYyITsCmwGtk+260johdU0qvF5rsAcwo/DyjEX0lSZIkSVIOtVTYcRvw26LH3yULP74FbA9MjIhyYCrZvh4PppSWAkTEg8DQiDgbKAOOAfoXxrm3EX0lSZIkSVIOtcgylpTS8pTSP6u+yJaYfJBSmp9SmgGcSxZcvEO2p8Z5Rd3PA9oWzv0G+FahD43pK0mSJEmS8qlFbz1bJaX0o1qPfw38up62C4GvrmGsBveVJEmSJEn505J3Y5EkSZIkSWp2hh2SJEmSJClXDDskSZIkSVKuGHZIkiRJkqRcMeyQJEmSJEm5YtghSZIkSZJyZb3celafDT997O/ruwRJkiRJ0meQMzskSZIkSVKuGHZIkiRJkqRcMeyQJEmSJEm5YtghSZIkSZJyxbBDkiRJkiTlimGHJEmSJEnKFcMOSZIkSZKUK4YdkiRJkiQpV1qv7wIkaU2OuueoJhln9DdGN8k4kiRJkjZ8zuyQJEmSJEm5UlLYERGtmrsQSZIkSZKkplDqzI63I+KGiOjTrNVIkiRJkiQ1Uqlhx2HAx8DoiHg1In4YETs2Y12SJEmSJEkNUlLYkVKaklK6BPgiMATYHfhLRDwdEWdGxObNWaQkSZIkSVKp1mmD0pTSKmBm4Ws+WfgxEJgbEac2fXmSJEmSJEnrptQNSreOiMERMQGYQhZynJZS6pxSOgg4BLixGeuUJEmSJEkqSesS280DniYLNB5JKX1YfDKl9OeIeKSpi5MkSZIkSVpXpYYdnVJK/1pTg5TSoMaXI0mSJEmS1Dil7tlxRkT0LT4QEf0i4tJmqEmSJEmSJKnBSg07vg28UuvYK8DFTVuOJEmSJElS45QadmwCrKh17COgTdOWI0mSJEmS1Dilhh1TgPNqHTsXmNq05UiSJEmSJDVOqRuUDgGejIhTgdnAl4EdgAHNVZgkSZIkSVJDlBR2pJRmRERn4CigA/AgMCaltKw5i5MkSZIkSVpXpc7soBBs/KYZa5EkSZIkSWq0ksKOiNgZGA6UAe2Kz6WUdmyGuiRJkiRJkhqk1Jkdvybbq+M7wPLmK0eSJEmSJKlxSg07ugH7ppRWNWcxkiRJkiRJjVXqrWefA3o1ZyGSJEmSJElNodSZHZXAHyLiQeCfxSdSSlc2dVGSJEmSJEkNVWrYsTkwGtgY+FLzlSNJkiRJktQ4JYUdKaUzmrsQSZIkSZKkplDqzA4iYjfgeGCHlNIFEdEF2DSl9HKzVSdJG6Cj7jmqScYZ/Y3RTTKOJEmSpJpK2qA0Ik4g26T0i8BphcNbANc1U12SJEmSJEkNUurdWIYCA1JK5wIfF469BOzRLFVJkiRJkiQ1UKlhx/Zk4QZAKvqe6m4uSZIkSZK0fpQadkwBTq117CRgUtOWI0mSJEmS1DilblB6EfDHiDgL2Dwi/gB0Bg5utsokSZIkSZIaoNRbz86MiK7AkcAYYC4wJqW0rDmLkyRJkiRJWlcl33o2pbQcuK8Za5EkSZIkSWq0ksKOiBhPPZuRppT2b9KKJEmSJEmSGqHUmR2/rPX4P4CzgHuathxJkiRJkqTGKXXPjrtqH4uI3wF3AkObuihJqjL7nffXdwmSJEmSPmVKvfVsXf4B9GyqQiRJkiRJkppCqXt2nFnr0GbAscCfmrwiSZIkSZKkRih1z45Taz1+D3gB+FnTliNJkiRJktQ4pe7ZcWBzFyJJkiRJktQUSl3G0qmUdimlOY0rR5IkSZIkqXFKXcbyBpAKP0fRz1WPKRxr1UR1SZIkSZIkNUipd2M5C/gt0BVoU/j+a+CslNJGhS+DDkmSJEmStN6VGnb8GDg7pfR6SumjlNLrwGBgWKkXioh7IuLtiFgSEa9FxNlF5w6KiJkRsTwino6InYrObRoRdxT6/TMiLqk1boP7SpIkSZKk/Ck17NgI6Fjr2E6s27KVq4GOKaUtgaOBYRHROyLaAw8CVwDbAJOBUUX9fgTsWrjegcClEXEoQGP6SpIkSZKkfCp1z46fAeMi4k5gLvAlYBDrcOvZlNKM4oeFr12A3sCMlNL9ABHxI2BBRHRNKc0ETgPOSCn9G/h3RNxeuPbvgWMb0VeSJEmSJOVQSTM7UkrXAmcAO5DNyvgP4MyU0v+uy8Ui4paIWA7MBN4GHge6AS8VXes9YDbQLSK2Br5QfL7wc7fCz43pK0mSJEmScqjUmR2klH5PI2dEpJTOi4gLgX2ACuBDoB0wv1bTxcAWhXNVj2ufo5F9a4iIc4BzAHbccceSno8kSZIkSdrwlDSzo7DR5/CImBMRiwvHDo6IC9b1gimlj1NKE4AOwLeAZcCWtZptCSwtnKPW+apzNLJv7bpuSyn1SSn12W677Up/QpIkSZIkaYNS6galPwO6AwPJ9toAmEEWVjRUa7I9O2YAe1QdjIjNq44X9tp4u/h84eeq/T8a01eSJEmSJOVQqWHH14BTUkoTgVUAKaV/AF8spXNEbB8RJ0VEu4hoFRGHACcD44CHgO4RcVxEtAGuBF4ubDAKcDfwXxGxdUR0Bb4JjCyca0xfSZIkSZKUQ6WGHR9Ra3+PiNgOeLfE/olsFsg84N/ACODilNIjKaX5wHHA8MK5vYCTivr+N9mmo28CzwLXFvYPoTF9JUmSJElSPpW6Qen9wF0RMQQgIj4PXA/8tpTOhVDigDWcfwroWs+5D4EzC19N2leSJEmSJOVPqTM7fghUAn8BPge8DrwFXNU8ZUmSJEmSJDXMWmd2RMRGwH7AZSmliwvLVxaklNJaukqSJEmSJLW4tc7sSCmtAh4pLAkhpTTfoEOSJEmSJG2oSl3G8lxE7N2slUiSJEmSJDWBUjcofRN4IiIeAeaS3V0FgJTSlc1RmCRJkiRJUkPUO7MjIi4oergV8DBZyNEB+FLRlyRJkiRJ0gZjTTM7hgM3FX4+KqW0ZQvUI0mSJEmS1ChrCjtmR8RPgRnAxhFxBhC1G6WU7miu4iRJkiRJktbVmsKOk4BLgZOBjYHT6miTAMMOSZIkSZK0wag37EgpvQacDRARY1NKB7VYVZL0GXDUPUc1yTijvzG6ScaRJEmS8qKkW88adEiSJEmSpE+LksIOSZIkSZKkTwvDDkmSJEmSlCuGHZIkSZIkKVcMOyRJkiRJUq4YdkiSJEmSpFwx7JAkSZIkSbli2CFJkiRJknLFsEOSJEmSJOWKYYckSZIkScoVww5JkiRJkpQrhh2SJEmSJClXDDskSZIkSVKuGHZIkiRJkqRcab2+C5CkT5vZ77zfJOPssn3bJhlHkiRJUk3O7JAkSZIkSbli2CFJkiRJknLFsEOSJEmSJOWKYYckSZIkScoVww5JkiRJkpQrhh2SJEmSJClXDDskSZIkSVKuGHZIkiRJkqRcMeyQJEmSJEm5YtghSZIkSZJyxbBDkiRJkiTlimGHJEmSJEnKFcMOSZIkSZKUK4YdkiRJkiQpVww7JEmSJElSrhh2SJIkSZKkXDHskCRJkiRJuWLYIUmSJEmScsWwQ5IkSZIk5YphhyRJkiRJyhXDDkmSJEmSlCuGHZIkSZIkKVcMOyRJkiRJUq4YdkiSJEmSpFwx7JAkSZIkSbnSen0XIElqnKPuOapJxhn9jdFNMo4kSZK0vjmzQ5IkSZIk5YphhyRJkiRJyhXDDkmSJEmSlCuGHZIkSZIkKVcMOyRJkiRJUq4YdkiSJEmSpFxpkbAjIjaNiF9FxJsRsTQipkXEYUXnD4qImRGxPCKejoidavW9IyKWRMQ/I+KSWmM3uK8kSZIkScqflprZ0RqYCxwAbAVcAdwXER0joj3wYOHYNsBk+P/t3X/snVddB/D3hxXZ7ChusE0RKLFuGRlhIy6SSFAIKL/yDWTVOKEgLDIE0ShDBFNgAkPJgsbIL0fGBhQQiRtYRqYhIpH4hxbJFsvGQtFtwGYLlLGObvzw+Md9Gr/7snYt36ffe5/T1yu54d7n3HN2nnG+z+593/Ock48sq3txktOTbEzylCSvrqpnJMlq6gIAAAB9WrcW/5DW2l2ZBQ8HfKKq/ivJzyV5aJKdrbWPJklVXZzk61V1ZmvtxiQvTPLi1treJHur6j1JXpTk2iTnraIucB927d4/SjubTj1hlHYAAACO1JqEHStV1WlJzkiyM8nLklx3oKy1dldV7UpyVlX9T5KHLy8fnj93eH7WKuoyEZd/8YJ5dwEAAIAJWfMFSqvqgUk+mOR9w+yLE5PcseJtdyR58FCWFeUHyrLKuiv7dWFV7aiqHXv27Dn8EwIAAAAWypqGHVX1gCQfSPLdJK8YDu9LsmHFWzckuXMoy4ryA2WrrXsvrbXLWmvnttbOPeWUUw7rfAAAAIDFs2ZhR1VVksuTnJZkc2vte0PRziRnL3vf+iSbMluLY2+S25aXD893jlAXAAAA6NBartnxriSPSfK01tryFRCvTnJpVW1Ock2S1ye5frjFJUnen2RrVe3ILCh5SZIXj1AXgGWWti2N1tb2LdtHawsAAI7UmszsqKqNSV6a5Jwkt1fVvuHx/NbaniSbk1ySZG+SJyQ5f1n1NyTZleTmJJ9Jcmlr7dokWU1dAAAAoE9rtfXszUnqEOWfSnLmQcruSXLB8Bi1LnD0jLWFLQAAwJFa891YAAAAAI6mtVyzA2BuxlyPAgAAWGxmdgAAAABdEXYAAAAAXRF2AAAAAF0RdgAAAABdEXYAAAAAXRF2AAAAAF0RdgAAAABdEXYAAAAAXRF2AAAAAF0RdgAAAABdEXYAAAAAXRF2AAAAAF0RdgAAAABdEXYAAAAAXRF2AAAAAF0RdgAAAABdEXYAAAAAXRF2AAAAAF0RdgAAAABdEXYAAAAAXRF2AAAAAF1ZN+8OAMDBLG1bGqWd7Vu2j9IOAADTYGYHAAAA0BVhBwAAANAVYQcAAADQFWEHAAAA0BULlPJD3nbNLfPuAgAAAPzIzOwAAAAAumJmBwBJkl2798+7CwAAMAphBwDdW9q2NEo727dsH6UdAACOLmEHcEwwawEAAI4d1uwAAAAAuiLsAAAAALoi7AAAAAC6IuwAAAAAuiLsAAAAALoi7AAAAAC6IuwAAAAAuiLsAAAAALoi7AAAAAC6IuwAAAAAuiLsAAAAALoi7AAAAAC6IuwAAAAAuiLsAAAAALqybt4dAICD2bV7/yjtbDr1hFHaAQBgGszsAAAAALoi7AAAAAC6IuwAAAAAuiLsAAAAALoi7AAAAAC6IuwAAAAAuiLsAAAAALqybt4dAIBj0dK2pVHa2b5l+yjtAAD0RNgBAIdprIACAICjy20sAAAAQFfWLOyoqldU1Y6quqeqrlxR9tSqurGqvlNVn66qjcvKHlRV762qb1fV7VX1yrHqAgAAAP1Zy5kdX0vy5iTvXX6wqh6W5Kokr0tycpIdST6y7C0XJzk9ycYkT0ny6qp6xmrrAgAAAH1as7CjtXZVa+1jSb6xoui8JDtbax9trd2dWUBxdlWdOZS/MMmbWmt7W2s3JHlPkheNUBcAAADo0CKs2XFWkusOvGit3ZVkV5KzquqkJA9fXj48P2uEuvdSVRcOt9ns2LNnz6pPCgAAAJiPRQg7Tkxyx4pjdyR58FCWFeUHylZb915aa5e11s5trZ17yimnHNEJAAAAAItjEbae3Zdkw4pjG5LcOZQdeH33irLV1gWAudm1e/+8uwAA0K1FmNmxM8nZB15U1fokmzJbi2NvktuWlw/Pd45QFwAAAOjQWm49u66qjk9yXJLjqur4qlqX5Ookj62qzUP565Nc31q7caj6/iRbq+qkYeHRlyS5cihbTV0AAACgQ2s5s2Nrkv1JXpNky/B8a2ttT5LNSS5JsjfJE5Kcv6zeGzJbdPTmJJ9Jcmlr7dokWU1dAAAAoE9rtmZHa+3izLaGva+yTyU58yBl9yS5YHiMWhcAAADozyKs2QEAAAAwmkXYjQUAmLOlbUvz7sIP2b5l+7y7AABMlJkdAAAAQFeEHQAAAEBXhB0AAABAV4QdAAAAQFeEHQAAAEBXhB0AAABAV4QdAAAAQFfWzbsDAKzOrt37592FH7K0bWneXQAA4BhmZgcAAADQFTM7AIBRZwhtOvWE0doCAPhRCDsA5mQRbz8BAIAeuI0FAAAA6IqZHQBA18ZaMHf7lu2jtAMAHH1mdgAAAABdEXYAAAAAXRF2AAAAAF2xZgcAwERZjwQA7puwAwBYSGN9kQcAjj3CDgCga7t27593FwCANWbNDgAAAKArwg4AAACgK25jAQBYY9YjAYCjy8wOAAAAoCvCDgAAAKArwg4AAACgK8IOAAAAoCvCDgAAAKArdmMBYHS7du+fdxcAADiGmdkBAAAAdEXYAQAAAHTFbSwAAHA/lrYtjdLO9i3bR2kHgEMTdgAAMIqxAoFEKADA6riNBQAAAOiKmR0AcJjsMsNYxhpLm049YZR2xuJvBIBFYWYHAAAA0BUzOwBgwsZcIwEAoBfCDgCAY1zPoVnP5wbAwQk7AOiedQQAAI4t1uwAAAAAumJmBwAATNBYt+hs37J9lHYAFomwAwCAbvW6zS8Ah+Y2FgAAAKArZnYAAAALY8wddNyiA8cuYQcAAKwRW+ECrA1hBwAAsGo9Bzk9Lwbb87lxbBN2AMCEjbX4IjA9/v4BDk7YAQAArJqdb2BtvO2aW0Zr66JnP2q0thaNsAMAWEh+tQa4bz3fMgRjEXYAAABdGisUEL7C9Ag7AAAOg19SAY6+nrceHuvcnnzSO0Zpp3fCDgAAgAkZc6aJNVLolbADABiV6d4A/BSvytEAAAlTSURBVKgW8b8hZvZNk7ADAICF48sFTIu/WRaNsAMA4DAs4q+NY+n53AB6c/kXLxitrYue/anR2lo0wg4AgIkSUjAGv8gDPeo+7Kiqk5NcnuRXknw9yWtbax+ab68AADiUXoOcXs8LejbW363FYNdW92FHknck+W6S05Kck+SaqrqutbZzvt0a15iJvK2MAADuTUgBMC1dhx1VtT7J5iSPba3tS/LZqvr7JC9I8pq5dm5kY/4H+MknjdYUAAALbtGCnEXrz5h6vmVoEf9/6/nfN/ev67AjyRlJftBau2nZseuS/NKc+gMAAByjFjEQWMQ+jWXRzm3R+tO73sOOE5PcseLYHUkevPKNVXVhkguHl/uq6otHuW9je1hma5Ks2quycYxmYCyjjW1YMMY2PTKu6ZWxTZfqopri2D6sL6y9hx37kmxYcWxDkjtXvrG1dlmSy9aiU0dDVe1orZ07737A2IxtemVs0yPjml4Z2/Sq57H9gHl34Ci7Kcm6qjp92bGzk3S1OCkAAADw/7oOO1prdyW5Kskbq2p9VT0xyXOSfGC+PQMAAACOlq7DjsHLk5yQZHeSDyd5WW/bzg4mewsO3A9jm14Z2/TIuKZXxja96nZsV2tt3n0AAAAAGM2xMLMDAAAAOIYIOwAAAICuCDsmrqpOrqqrq+quqrq5qp437z7BkaqqB1XV5cMYvrOqPl9Vz1xW/tSqurGqvlNVn66qw9pbGxZJVZ1eVXdX1bZlx543jPu7qupjVXXyPPsIR6qqzq+qG4YxvKuqnjQcd91mkqrq0VX1yaraW1W3V9Xbq2rdUHZOVX1uGNefq6pz5t1fOJiqekVV7aiqe6rqyhVlB71GD5/L31tV3x7+Bl655p0fibBj+t6R5LtJTkvy/CTvqqqz5tslOGLrktya5JeSPCTJ65L87fCB42GZ7ar0uiQnJ9mR5CPz6iiswjuS/PuBF8O1+q+TvCCza/h3krxzPl2DI1dVv5zkrUlenOTBSX4xyZddt5m4d2a2scFPJTkns88mL6+qH0vy8STbkpyU5H1JPj4ch0X0tSRvTvLe5QcP4xp9cZLTk2xM8pQkr66qZ6xBf0dngdIJq6r1SfYmeWxr7abh2AeSfLW19pq5dg5WqaquT/InSR6a5EWttV8Yjq9P8vUkj2+t3TjHLsJhq6rzk5yX5AtJfra1tqWq3pLk0a215w3v2ZTkhiQPba3dOb/ewuGpqn9Ncnlr7fIVxy+M6zYTVVU3JLmotfbJ4fWlSTYk+bskVyR5RBu+QFXVLUkubK1dO6/+wv2pqjdnNm5fNLw+5DW6qr6a5MWttX8cyt+U5PTW2vlzOYFVMLNj2s5I8oMDQcfguiRmdjBpVXVaZuN7Z2bj+boDZa21u5LsinHORFTVhiRvTHLRiqKVY3tXZjP1zli73sGPpqqOS3JuklOq6ktV9ZVhuv8Jcd1m2v4yyflV9eNV9dNJnpnk2szG7/Xt3r8UXx/jmuk56DW6qk5K8vDl5Znw90thx7SdmOSOFcfuyGwqKUxSVT0wyQeTvG/4BdA4Z+relNmv37euOG5sM2WnJXlgkl9N8qTMpvs/PsnWGNtM22cy+2L37SRfyWyK/8diXNOPQ43lE5e9Xlk2OcKOaduX2bS65TYkMf2ZSaqqByT5QGa/br9iOGycM1nD4nVPS/IX91FsbDNl+4f//avW2m2tta8n+fMkz4qxzUQNn0P+IbP1DNYneVhm63O8NcY1/TjUWN637PXKsskRdkzbTUnWVdXpy46dndnUf5iUqqokl2f2a+Hm1tr3hqKdmY3rA+9bn2RTjHOm4clJHp3klqq6Pcmrkmyuqv/ID4/tn0nyoMyu7bDQWmt7M/vV+74Wf3PdZqpOTvLIJG9vrd3TWvtGZut0PCuz8fu44fPKAY+Lcc30HPQaPVzbb1tengl/vxR2TNhwf9VVSd5YVeur6olJnpPZL+MwNe9K8pgkS621/cuOX53ksVW1uaqOT/L6zO6ZtcgdU3BZZh8gzhke705yTZKnZ3a71lJVPWn4oPHGJFdZnJQJuSLJ71bVqcN93r+f5BNx3WaihhlK/5XkZVW1rqp+IslvZrZmwT8n+UGS3xu25jwwA/Wf5tJZuB/DGD4+yXFJjquq44dtlO/vGv3+JFur6qSqOjPJS5JcOYdTWDVhx/S9PMkJmW2R9eEkL2utTTJ549g17O390sy+DN5eVfuGx/Nba3uSbE5ySWa7Dz0hyeRWg+bY1Fr7Tmvt9gOPzKaH3t1a2zNcq387s9Bjd2b3w758jt2FI/WmzLZTvimznYQ+n+QS120m7rwkz0iyJ8mXknw/yR+01r6b5LlJXpjkW0kuSPLc4Tgsoq2Z3XL4miRbhudbD+Ma/YbMFiy9ObM1bC6d6o5Dtp4FAAAAumJmBwAAANAVYQcAAADQFWEHAAAA0BVhBwAAANAVYQcAAADQFWEHAAAA0BVhBwDQnap6UlV9cd79AADmo1pr8+4DAAAAwGjM7AAAulJV6+bdBwBgvoQdAMAkVNV/V9Vrq+oLVbW3qq6oquOr6slV9ZWq+qOquj3JFQeOLav7yKq6qqr2VNU3qurty8ouqKobhjb/oao2zuUEAYDRCDsAgCl5fpKnJ9mU5IwkW4fjP5nk5CQbk1y4vEJVHZfkE0luTvLoJD+d5G+Gsucm+eMk5yU5Jcm/JPnwUT4HAOAoE3YAAFPy9tbara21bya5JMlvDMf/N8kbWmv3tNb2r6jz80kenuQPW2t3tdbubq19dih7aZI/ba3d0Fr7fpK3JDnH7A4AmDZhBwAwJbcue35zZiFGkuxprd19kDqPTHLzEGastDHJX1bVt6rqW0m+maQym/0BAEyUBbwAgCl55LLnj0ryteH5obaXuzXJo6pq3X0EHrcmuaS19sER+wgAzJmZHQDAlPxOVT2iqk7ObK2NjxxGnX9LcluSP6uq9cOipk8cyt6d5LVVdVaSVNVDqurXjkrPAYA1I+wAAKbkQ0n+McmXh8eb769Ca+0HSZaS/GySW5J8JcmvD2VXJ3lrkr+pqm8n+c8kzzwqPQcA1ky1dqhZnwAAi6Gq/jvJb7XWPjXvvgAAi83MDgAAAKArwg4AAACgK25jAQAAALpiZgcAAADQFWEHAAAA0BVhBwAAANAVYQcAAADQFWEHAAAA0BVhBwAAANCV/wNQHMCBXHy8ygAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x23a17e7e320>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "shipping_fee_by_buyer = train.loc[df['shipping'] == 0, 'price']\n",
    "shipping_fee_by_seller = train.loc[df['shipping'] == 1, 'price']\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(18,8))\n",
    "ax.hist(shipping_fee_by_seller, color='#8CB4E1', alpha=1.0, bins=50, range = [0, 100],\n",
    "       label='Price when Seller pays Shipping')\n",
    "ax.hist(shipping_fee_by_buyer, color='#007D00', alpha=0.7, bins=50, range = [0, 100],\n",
    "       label='Price when Buyer pays Shipping')\n",
    "plt.xlabel('price', fontsize=12)\n",
    "plt.ylabel('frequency', fontsize=12)\n",
    "plt.title('Price Distribution by Shipping Type', fontsize=15)\n",
    "plt.tick_params(labelsize=12)\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The average price is 22.58 if seller pays shipping\n",
      "The average price is 30.11 if buyer pays shipping\n"
     ]
    }
   ],
   "source": [
    "print('The average price is {}'.format(round(shipping_fee_by_seller.mean(), 2)), 'if seller pays shipping');\n",
    "print('The average price is {}'.format(round(shipping_fee_by_buyer.mean(), 2)), 'if buyer pays shipping')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We compare again after log-transformation on the price."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABDsAAAH4CAYAAABE9+afAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3Xm8VWW9+PHPV1BBUVPRbkoOmIAyHWTQ0KPHuDgPXRzSTMUhMafU7k8rr5YD6b2SmanXtBS9WiHmiFopioKaxKSFggphkJSgAjIpyPP7Y+1z3GdzDmzOBC0/79drv87e6xnWs9Y+5/Viffk+zxMpJSRJkiRJkvJio/U9AEmSJEmSpKZksEOSJEmSJOWKwQ5JkiRJkpQrBjskSZIkSVKuGOyQJEmSJEm5YrBDkiRJkiTlisEOSVLuRMQPIyIVvd6JiN9GxG5ltB1caNOuBcY5pmiMKyLi3YgYHRHnRsSmjRlXRHQq3IfPlVl/l0L/RxQdmxURw9btqurt/6yI+Godx5vsHGWOI0XEec3U98YRcXFE/CUilkbE/Ih4OSK+W1SnqjCGbmvpa3hETGimca72XTeXoutd46u5xyFJ+uxpvb4HIElSM1kIHFJ43xG4GhgdEV1TSkvW0O5x4MvA0mYeX7Vnge+T/QfEdkAV8N/AGRHxlZTSggaOqxPwA2A4sGDNVQGYW+h/WrkDX0dnAX8BHi45/h/Ae810zpZ2M3ASMBR4GfgcsA9wJHDdOvZ1NdC2SUf3qeb+rotNKpyr2r7AMGBQYRySJDULgx2SpLxamVL6Y+H9HyPib8BY4DBgZGnliGgFtEopzQPmtdwweb9onACPRMRdwIvAT4DTAJpzXBHRJqW0HPjjWis3sZTS5JY+Z3OIiM3IvqvLUkrXFxU9GBGxrv2llGY02eBW7/sjWui7TiktKj5XRLQvvJ2cUprVEmOQJH02OY1FkvRZMbHwcxf4dJpARHw1IqYCy4G965ouEhFtI+J/IuLtiPgoIv4aEdcWdx4RZ0bE1EL52xFxSUMHmlJ6lUKWQERsWei/rnF9LyLeiojlEfHPiPhdRPxbRFQBjxWq/bXQblZJP/0K02iWAf9vTVMbIuLyiPhHRCyOiPsiYquisjqn1xRPT4mIMUBv4NSiqQuDS+sVtT0+Iv5cuJezI2JoRLQuKq8+Z/eIeCoilkTEtIgYVOYt3iQifhoR70fEgoj4WURsUuh7m8L9PLVkTFH43m+op8/NgY2Bf5QWpJTqmqbRPiJGFu7pzIg4p+R8taaxFF1z34gYGxHLIuKNiPiPknZjIuKByKYNzSrUezwidiyqU++UpYi4KCLmRMQHEfGbKJkGFRE9IuLFwj2aGhGHFf6OhtdzX8oSEcdExMricRaOdymM9aDC5z9GxL0RcV5E/C2y6UKPRMS/lbTbLCJuiIi/F36PJkXEwMaMUZL0r8VghyTps2KXws9/lBz7H+BasoyPv5Y2iogAHgG+BdxSqPcDoH1Rnf8H/C/ZFI0jCu+vjsatDfEU2cPzXnUVRsQpZNNfbgAOLozvLbKH7knAfxaqDiKbRvAfJV38GhhVuJ5RaxjHicC/A98ELgYOB36xjtdyDtmUiScKY/ky2bSc1RQeakcUruFo4GeFa7m5juq/Ah4lu7Y3gd9ERIcyxvMdoAPZlJNryKbYDAVIKb0PPEQho6ZIFdnvy111dVjIvJkN/DAiBkXEFmsZwx3AK4WxjwFuiYh+ZYx9BNnv4yDgz8DIiOhZUufLwPlk39cZQA9Wnz5Ul+OBAWT341Ky3+UfVRdGlr3ye7LpNSeS3bufADuV0ffaPEo2nenkkuOnAXOAp4uOHQicDnwbGAL0oyhbq+hv9uvAlWTTiP4CPB4RezTBWCVJ/wKcxiJJyq2ibICOwK3Ah9R+aNoW+PeU0pSiNqXdHAQMBI5OKT1adPyeQv0tyYIf16SUriyUPVV4MPyviPjflNInDRj+nMLPz9dT3g/4Q0rp1qJjDxZdx/TC2/qmC9yUUvppUf1d6jlPW+DwlNLiQr0lwP9FxB4ppdfXdhEAKaXXCu3mlUzZqctVwJiUUnVmxe8K38m1EXFNSmlOUd2fpJTuLIxrIvBPsgf029Zyjg+B41JKq4AnI1sM9rKIuLYQ7Pgl8IeI6JhSmllocxowMaX05zX0Oxj4DfBbYFVETC58viml9HFJ3V+nlK4pjH0M2QP5IGD8Wsb+i5RSdcbM74HXgO8BJxTV2R7on1J6u1DvbWBcRBySUvrdGvpeAXw1pbSy0G7PQr/VWSenkf3N9Ekp/b1QZwbZ+iSNklJaERH/R3YPryv03Yos+HFn4buq1r4whrmFeu8AT0dEVUppDFkA79+BfVJK1WP7Q0R0JgsQlgZUJEk5ZGaHJCmvtiV7eFsBTCcLeHyt+gGp4O/FgY56fIVsXY1H6yn/Mlk2xciIaF39Ap4hC1SUk2lQl7Wt8zAFOCwiroxsSkqrdey/zsyKOjxVHegoeLAwtr7reL61KlzDXqy+psoIsn+zfLnk+B+q36SU3gPepbz7/UjJw/ODZEGd6h1SRgNvA6cWxrUFWSCizqyOojE8A+xGlvVwJ9nv4PXAMxFR+m+u4rGvIMtMKWfsDxW1W0WWwVCaETKpOtBRqPcC2b1ZW+bIs9WBjoLXgO2rp/iQfecTqwMdhb7HkwWZmsIvgc4RUf09Hwx8gWyR3WJ/LP47TimNBhbx6fX9OzALmFjyNzka6NNEY5UkbeAMdkiS8moh2cNZH7KHyF1SSk+W1CnnIW1b1rxrRPV0lql8GlxZQbbLCsAXyx1wieq1C+ob451k/0t9PNn/rP8zIq5eh6BHuQ+o7xZ/SCktAxaTPYQ2tfZkU3dKx1b9eZuS46W7zHwMtCnjPO/W8/kLULPGxl1ka4wE2T1uTTZtZo1SSh+mlH6TUvomn+4CtC9Z5kZzjb30uyitU1+9UnWNKYDqYMe/UfciuU2ycG4hU+glPp1CdBrwfErprZKqa7u+9mRTjlaUvL5Hw/8eJUn/YpzGIknKq5UppQlrqVPXwpGl3mPND4nvF34eQd0BhOl1HCvHQWQPaBPrKiz8r/5PgJ9ExBf5dMvTv7P2aRxQ3rVDNiWiRkS0BdrxaQBoeeHnJtS2dZn9F5tPds3blxyvnsrzPk2jtP/qz8VBrbvIpicdSDa14uGU0gfrcpKUUoqI64HLgS5kWRiNtT21t+rdntWDcaXXV1+9dfUPoHMdx7drZL/FfkH2O1291sbZddRZ2/W9T7b+zvF11FtVxzFJUg6Z2SFJ0pqNBraJOnYpKXgJWAbskFKaUMfrw3U9YUT0AM4F7i2nfUppdkrpOrIFSvcsHK5eI6KcbIE1GRi1d1oZRBYoqQ4kVa+hUbPwY0TsDWxZ0s9aMxcKa5tMBI4rKTqe7CH1pXUaef2OLplWMojsO/xL0Vhmk001uRLYj7VMYYmIjUt3LinYvfCzqaZ61Cw0W7iGo1l9nY+9ImKnonr7kgUD1rYeyNr8CehTsrNLP+pfV6Yh7if79+lvyAJfD9RRZ5+IqAlARsQAst+36usbTZbN9UEdf4+TmnCskqQNmJkdkiSt2VNkO1D8KiKuItsl5AvA/imlISmlBRHxQ+CnEbEz8DzZw1on4MCUUukuKKW2iYh9Cm22Jcsk+CbwBtluGnWKiJ+T/Q/2H8mm7BxI9mB9aaFKdUbJkIj4DbB0LYtr1mcZ2S4W15Nd9/XAQyml1wrl48mySW6KiMvJpppcQraGQrFpwMERcTBZZsJfC+tslPoB8PuIuIvsgbc72VSQO0oWJ22MLcjWWLkD6ApcAdxcWJy02C/J1g+ZQ/Z7sCZbAW9ExN1kU5gWkmVBfI/s/jy0hrbr4syI+JgsMPNN4Etka4QUexcYVfi9bAP8N9k6HmtanLQcdwH/Vej7SrJ1Tq4km8bSJBkTKaXFETGCbBeZu0rWi6k2n+x38iqyLKPrgRcLi5NCtrvQ88DoiPhv4HXgcxR2NkopXdEUY5UkbdgMdkiStAaFqQj/QfbAfSFZyv47FK3fkFL6n8KOEBeRbWu6nCxYMaKMUxxIlrGwkmzNhD8D3yXbdeOjNbR7iexhdwjZA+1bwDdTSg8XxvR2RPwncAHZNqRz+HT73XXxG7LdS35J9mD5KNk2txTO83Hh/txK9r/w0wvl95X0cw3ZFqX3k/0v/GmsvvAkKaU/RMQJZA/VJ5E9uP+YLAjSVH5Mtp7Gr8mCTL8gW/+k1Ciy7+XukgVN67KIbBvjw8i2PN2SLMjxe7KdehY2zdA5gWz60jVk3+nXUkqTS+q8RLbr0I1kv69jyLaTbZSU0tKIOIRsa+URZIuAXkJ23aXBrcZ4mCzYcWc95c+SBdluJgsQPk32d1A9zlWFTKwrgP9HluXxHjAZ+OlqvUmScimyNbgkSZJULCIOIwt4dKpjkcyWHstgssyKLerJdqiuNwaYn1I6toXGtStZYO+slNIap/qsQ583AYeklDrVUfZH4K2U0jea4lySpPwys0OSJKlIROxANiXoOuCJ9R3o2JBExPfIMpveJsvU+R7ZNJbfNkHfXci2/z2TLGNEkqQGM9ghSZJU21lk02gmkU0B0qcS2ZSiHYCPgLHAf6aUmmIay3CgJ1ngpJwdhSRJqpfTWCRJkiRJUq649awkSZIkScoVgx2SJEmSJClXXLOjDu3bt0+77LLL+h6GJEmSJEkqMnHixPkppe3WVs9gRx122WUXJkyYsL6HIUmSJEmSikTE2+XUa5FpLBGxuOT1SUT8rKh8QERMi4ilEfFsROxcVLZpRNwZEYsi4h8RcXFJ3w1uK0mSJEmS8qdFgh0ppXbVL+DzwDJgJEBEtAceBC4HtgEmACOKmv+QbK/7nYEDgUsi4pDGtpUkSZIkSfm0PhYoPRZ4l2xfdoBBwNSU0siU0nKyAEXPiOhSKD8FuDql9EFK6XXgDmBwE7SVJEmSJEk5tD7W7DgVuCellAqfuwKvVBemlJZExAyga0T8E9ihuLzw/qtN0FaSJEmSNigrVqxgzpw5LF++fH0PRVqv2rRpQ4cOHdh4440b1L5Fgx0RsRNwAHBG0eF2wLySqguBLQpl1Z9LyxrbtnRsZwFnAey0005ruRJJkiRJanpz5sxhiy22YJdddiEi1vdwpPUipcR7773HnDlz2HXXXRvUR0tPYzkFGJdS+mvRscXAliX1tgQ+LJRRUl5d1ti2taSUbk8p9Ukp9dluu7XuYiNJkiRJTW758uVsu+22Bjr0mRYRbLvtto3KcFofwY67S45NBXpWf4iIzYHdyNbi+ACYW1xeeD+1CdpKkiRJ0gbHQIfU+L+DFgt2RER/YEcKu7AUeQjoFhHHREQb4Arg1ZTStEL5PcB/RcTWhYVHvwkMb4K2kiRJkqQSrVq1oqKigm7dunHcccexdOnSOusddthhLFiwoFnHMmbMGI444ogm73fVqlVccMEFdOvWje7du9O3b1/++te/rrFNVVUVEyZMAGCXXXZh/vz5TT6u5jB06FC6du1Kjx49qKio4OWXXwbqv4ZHH32U6667rsHn69+/f4PbNqWWXLPjVODBlFKtaSQppXkRcQxwM3Av8DJwQlGVHwD/C7xNtmXtf6eUftfYtpIkSZK0ofvx439r0v6+c/ja1yds27YtU6ZMAeCkk07itttu4+KLL64pTymRUuKJJ55o0rG1pBEjRvDOO+/w6quvstFGGzFnzhw233zzZjvfJ598QqtWrZqt//q89NJLjBo1ikmTJrHpppsyf/58Pv744zW2OeqoozjqqKMafM4XX3yxwW2bUotldqSUhqSUTq6n7OmUUpeUUtuUUlVKaVZR2UcppdNTSlumlD6fUrqhqdpKkiRJkupXWVnJW2+9xaxZs9hjjz0455xz2GuvvZg9e3atzIB77rmHHj160LNnT04+OXvsmzdvHscccwx9+/alb9++vPDCC6v1f9hhh/Hqq68C0KtXL6666ioALr/8cn7xi18AsHjxYo499li6dOnCSSedRPXGnhMnTuSAAw6gd+/eHHzwwcydOxfIMjAuvfRS+vXrR6dOnRg7duxq5507dy5f+MIX2Gij7JG4Q4cObL311gD84Q9/4Mtf/jJ77bUXxx13HIsXL16tfbF7772Xfv36UVFRwZAhQ/jkk08AaNeuHVdccQV77703L730Uq02VVVVXHjhhfTv359u3boxfvx4AMaPH0///v3p1asX/fv3Z/r06TXfQ3UACmDffffl1Vdf5bnnnqOiooKKigp69erFhx/WXqJy7ty5tG/fnk033RSA9u3bs8MOO9SU/+xnP2Ovvfaie/fuTJuWTZAYPnw45513HgCDBw/m7LPPprKykk6dOjFq1KiaOkcffTSHHHIInTt35sorr6zps127bK+QMWPGUFVVVed398QTT9ClSxf2228/LrjggmbJ3mnpNTskSZIkSf8CVq5cyZNPPkn37t0BmD59OqeccgqTJ09m5513rqk3depUhg4dyjPPPMMrr7zCT3/6UwC+/e1vc9FFF/GnP/2J3/72t5x55pmrnWP//fdn7NixLFq0iNatW9cERMaNG0dlZSUAkydP5sYbb+S1115j5syZvPDCC6xYsYLzzz+fBx54gIkTJ3L66adz2WWX1Rr7+PHjufHGG2s9iFc7/vjjeeyxx6ioqOA73/kOkydPBmD+/Plcc801PP3000yaNIk+ffpwww31/5/566+/zogRI3jhhReYMmUKrVq14r777gNgyZIldOvWjZdffpn99ttvtbZLlizhxRdf5NZbb+X0008HoEuXLjz//PNMnjyZq666iu9///sAnHnmmQwfPhyAN954g48++ogePXowbNgwbrnlFqZMmcLYsWNp27ZtrXMcdNBBzJ49m06dOnHOOefw3HPP1Spv3749kyZN4lvf+hbDhg2r8xpnzZrFc889x+OPP87ZZ59ds2jo+PHjue+++5gyZQojR46smeJTrK7vbvny5QwZMoQnn3yScePGMW9e6QarTaNFt56VJEmSJG3Yli1bRkVFBZBlFJxxxhm888477Lzzzuyzzz6r1X/mmWc49thjad++PQDbbLMNAE8//TSvvfZaTb1Fixbx4YcfssUWW9Qcq6ys5KabbmLXXXfl8MMP56mnnmLp0qXMmjWLzp07M3fuXPr160eHDh0AqKioYNasWXzuc5/jL3/5CwMHDgSyaSJf+MIXavodNGgQAL1792bWrFmrjblDhw5Mnz6dZ555hmeeeYYBAwYwcuRIli1bxmuvvca+++4LwMcff8yXv/zleu/V6NGjmThxIn379q25d9tvvz2QrX1yzDHH1Nv2xBNPBLKAz6JFi1iwYAEffvghp556Km+++SYRwYoVKwA47rjjuPrqq7n++uu58847GTx4MJBleFx88cWcdNJJDBo0qOY+VWvXrh0TJ05k7NixPPvss3zta1/juuuuq2lffJ8efPDBOsd5/PHHs9FGG7H77rvTsWPHmgyQgQMHsu2229b0M27cOPr06VOrbV3fXbt27ejYsWPNlrInnngit99+e733qaEMdkiSJEmSahSv2VGsvjUtUkp17pyxatUqXnrppdWyDYr17duXCRMm0LFjRwYOHMj8+fO544476N27d02d6ikYkAUQVq5cSUqJrl27rjY9pLRNdf366hx66KEceuihfP7zn+fhhx/moIMOYuDAgfz617+ud8yl137qqady7bXXrlbWpk2bNa7TUXrPIoLLL7+cAw88kIceeohZs2ZRVVUFwGabbcbAgQN55JFHuP/++2uyKL773e9y+OGH88QTT7DPPvvw9NNP06VLl1r9tmrViqqqKqqqqujevTt33313TbCjnPtU1zjXdLxYfd9dS3AaiyRJkiSpwQYMGMD999/Pe++9B8D7778PZFMobr755pp6dQVQNtlkE774xS9y//33s88++1BZWcmwYcNqprDUp3PnzsybN68m2LFixQqmTp1a9pgnTZrEO++8A2RBmVdffbUmc+WFF17grbfeAmDp0qW88cYba7z2Bx54gHfffbfm2t9+++2yxjBixAggm7Kz1VZbsdVWW7Fw4UJ23HFHgJppK9XOPPNMLrjgAvr27VuTPTNjxgy6d+/OpZdeSp8+fWqyLqpNnz6dN998s+bzlClTak1BKsfIkSNZtWoVM2bMYObMmXTu3BmAp556ivfff59ly5bx8MMP12TDrE2XLl2YOXNmTcZN9X1oamZ2SJIkSZIarGvXrlx22WUccMABtGrVil69ejF8+HBuuukmzj33XHr06MHKlSvZf//9ue2221ZrX1lZyejRo9lss82orKxkzpw5aw12bLLJJjzwwANccMEFLFy4kJUrV3LhhRfStWvXssb87rvv8s1vfpOPPvoIyKZbnHfeebRp04bhw4dz4okn1pRdc801dOrUqc5+9txzT6655hoOOuggVq1axcYbb8wtt9xSVkBh6623pn///ixatIg777wTgEsuuYRTTz2VG264ga985Su16vfu3Zstt9yS0047rebYjTfeyLPPPkurVq3Yc889OfTQQ2u1Wbx4Meeffz4LFiygdevWfOlLX1rnKSOdO3fmgAMO4J///Ce33XYbbdq0AWC//fbj5JNP5q233uLrX//6alNY6tO2bVtuvfVWDjnkENq3b0+/fv3WaTzlipZKIflX0qdPn1TX4iqSJEmS1Jxef/119thjj/U9DDWzqqoqhg0bVnaAAOCdd96hqqqKadOm1ewi09wGDx7MEUccwbHHHlvr+PDhw5kwYUKtzJ11sXjxYtq1a0dKiXPPPZfdd9+diy66aLV6df09RMTElNJab5zTWCRJkiRJ2oDdc8897L333gwdOrTFAh3N6Y477qCiooKuXbuycOFChgwZ0uTnMLOjDmZ2SJIkSVofzOyQPmVmhyRJkiRJUoELlEqSAPjx439rkn6+c/hOTdKPJEmS1FBmdkiSJEmSpFwx2CFJkiRJknLFYIckSZIkqUarVq2oqKigW7duHHfccSxdurTOeocddhgLFixo1rGMGTOGI444osn7nTVrFm3btqWiooKePXvSv39/pk+f3uTn2VAMHTqUrl270qNHDyoqKnj55ZcB2GWXXZg/f/5q9R999FGuu+66Bp+vf//+DW7bVFyzQ5IkSZI2UEfee2ST9vfYNx5ba522bdsyZcoUAE466SRuu+02Lr744prylBIpJZ544okmHVtL22233Wqu8+c//zk/+tGPuPvuu5vtfJ988gmtWrVqtv7r89JLLzFq1CgmTZrEpptuyvz58/n444/X2Oaoo47iqKOOavA5X3zxxQa3bSpmdkiSJEmS6lRZWclbb73FrFmz2GOPPTjnnHPYa6+9mD17dq2sgHvuuYcePXrQs2dPTj75ZADmzZvHMcccQ9++fenbty8vvPDCav0fdthhvPrqqwD06tWLq666CoDLL7+cX/ziFwAsXryYY489li5dunDSSSeRUgJg4sSJHHDAAfTu3ZuDDz6YuXPnAlBVVcWll15Kv3796NSpE2PHjl3rdS5atIitt94agOHDh3PeeefVlB1xxBGMGTOGX/7yl1x00UU1x++4446aINC9995Lv379qKioYMiQIXzyyScAtGvXjiuuuIK9996bl156qdY5q6qquPDCC+nfvz/dunVj/PjxAIwfP57+/fvTq1evWhknlZWVNcEZgH333ZdXX32V5557joqKCioqKujVqxcffvhhrfPMnTuX9u3bs+mmmwLQvn17dthhh5ryn/3sZ+y11150796dadOmrXYPBg8ezNlnn01lZSWdOnVi1KhRNXWOPvpoDjnkEDp37syVV15Z02e7du2ALDOnqqqqzu/viSeeoEuXLuy3335ccMEFTZ7BY7BDkiRJkrSalStX8uSTT9K9e3cApk+fzimnnMLkyZPZeeeda+pNnTqVoUOH8swzz/DKK6/w05/+FIBvf/vbXHTRRfzpT3/it7/9LWeeeeZq59h///0ZO3YsixYtonXr1jUBkXHjxlFZWQnA5MmTufHGG3nttdeYOXMmL7zwAitWrOD888/ngQceYOLEiZx++ulcdtlltcY+fvx4brzxxloP4cVmzJhBRUUFu+22GzfccEOt7JW6nHDCCTz66KOsWLECgLvuuovTTjuN119/nREjRvDCCy8wZcoUWrVqxX333QfAkiVL6NatGy+//DL77bffan0uWbKEF198kVtvvZXTTz8dgC5duvD8888zefJkrrrqKr7//e8DcOaZZzJ8+HAA3njjDT766CN69OjBsGHDuOWWW5gyZQpjx46lbdu2tc5x0EEHMXv2bDp16sQ555zDc889V6u8ffv2TJo0iW9961sMGzaszmufNWsWzz33HI8//jhnn302y5cvB7LAzH333ceUKVMYOXIkEyZMWK1tXd/f8uXLGTJkCE8++STjxo1j3rx5a7z3DWGwQ5IkSZJUY9myZVRUVNCnTx922mknzjjjDAB23nln9tlnn9XqP/PMMxx77LG0b98egG222QaAp59+mvPOO4+KigqOOuooFi1atFrWQWVlJc8//zzjxo3j8MMPZ/HixSxdupRZs2bRuXNnAPr160eHDh3YaKONqKioYNasWUyfPp2//OUvDBw4kIqKCq655hrmzJlT0++gQYMA6N27N7NmzarzOqunscyYMYMbb7yRs846a433ZfPNN+crX/kKo0aNYtq0aaxYsYLu3bszevRoJk6cSN++famoqGD06NHMnDkTyNY/OeaYY+rt88QTTwSyoM+iRYtYsGABCxcu5LjjjqNbt25cdNFFTJ06FYDjjjuOUaNGsWLFCu68804GDx4MZBkeF198MTfddBMLFiygdevaq1W0a9eOiRMncvvtt7Pddtvxta99rSZoUu69Ov7449loo43Yfffd6dixY00GyMCBA9l2221p27YtgwYNYty4cau1rev7mzZtGh07dmTXXXetdR+akmt2SJIkSZJqFK/ZUWzzzTevs35KiYhY7fiqVat46aWXVss0KNa3b18mTJhAx44dGThwIPPnz+eOO+6gd+/eNXWqp19AFjxYuXIlKSW6du262tSQ0jbV9dfmqKOO4rTTTgMgPKPzAAAgAElEQVSgdevWrFq1qqasOosBsuyKH/3oR3Tp0qWmfkqJU089lWuvvXa1ftu0abPGdTpK71tEcPnll3PggQfy0EMPMWvWLKqqqgDYbLPNGDhwII888gj3339/TRbFd7/7XQ4//HCeeOIJ9tlnH55++mm6dOlSq99WrVpRVVVFVVUV3bt35+67764JlpRzr+oa55qOF6vv+2tuZnZIkiRJkhpswIAB3H///bz33nsAvP/++0A2feLmm2+uqVdXAGWTTTbhi1/8Ivfffz/77LMPlZWVDBs2rGYKS306d+7MvHnzaoIdK1asqMmAaIhx48ax2267AdkOJVOmTGHVqlXMnj27Zi0NgL333pvZs2fzq1/9qiYbYcCAATzwwAO8++67Ndf/9ttvl3XeESNG1Jx/q622YquttmLhwoXsuOOOALUyMCALtlxwwQX07du3JoNmxowZdO/enUsvvZQ+ffrUZF1Umz59Om+++WbN5ylTptSahlSOkSNHsmrVKmbMmMHMmTNrsm6eeuop3n//fZYtW8bDDz/MvvvuW1Z/Xbp0YebMmTWZJNX3oSmZ2SFJkiRJarCuXbty2WWXccABB9CqVSt69erF8OHDuemmmzj33HPp0aMHK1euZP/99+e2225brX1lZSWjR49ms802o7Kykjlz5qw12LHJJpvwwAMPcMEFF7Bw4UJWrlzJhRdeSNeuXcsed/WaHSklNtlkk5oFUffdd1923XVXunfvTrdu3dhrr71qtTv++OOZMmVKzYKme+65J9dccw0HHXQQq1atYuONN+aWW24pK6Cw9dZb079/fxYtWsSdd94JwCWXXMKpp57KDTfcwFe+8pVa9Xv37s2WW25Zk1UCcOONN/Lss8/SqlUr9txzTw499NBabRYvXsz5559fM8XlS1/6ErfffnvZ9wmy4NIBBxzAP//5T2677TbatGkDwH777cfJJ5/MW2+9xde//nX69OlTVn9t27bl1ltv5ZBDDqF9+/b069dvncZTjmiJ9JF/NX369El1LawiSXn248f/1iT9fOfwnZqkH0mSPotef/119thjj/U9DK3BEUccwUUXXcSAAQMa1U9VVRXDhg0rO0AA8M4771BVVcW0adPYaKOWmagxePBgjjjiCI499thax4cPH86ECRNqZe+si8WLF9OuXTtSSpx77rnsvvvutXa7gbr/HiJiYkpprTfNaSySJEmSJK3FggUL6NSpE23btm10oKMh7rnnHvbee2+GDh3aYoGO5nTHHXdQUVFB165dWbhwIUOGDGnS/s3sqIOZHZI+i8zskCRp/TOzQ/qUmR2SJEmSJEkFBjskSZIkaQNi9r3U+L8Dgx2SJEmStIFo06YN7733ngEPfaallHjvvfdqdn1pCLeelSRJkqQNRIcOHZgzZw7z5s1b30OR1qs2bdrQoUOHBrc32CFJkiRJG4iNN96YXXfddX0PQ/qX5zQWSZIkSZKUKwY7JEmSJElSrhjskCRJkiRJuWKwQ5IkSZIk5YrBDkmSJEmSlCsGOyRJkiRJUq649awkaYN15L1HNkk/j33jsSbpR5IkSf8azOyQJEmSJEm5YrBDkiRJkiTlisEOSZIkSZKUKwY7JEmSJElSrrhAqSQJgF9OP71J+hnzQVsXBJUkSdJ6ZWaHJEmSJEnKFYMdkiRJkiQpVwx2SJIkSZKkXDHYIUmSJEmScsVghyRJkiRJyhWDHZIkSZIkKVcMdkiSJEmSpFwx2CFJkiRJknLFYIckSZIkScqV1ut7AJIkqeGOvPfIJunnsW881iT9SJIkbQjM7JAkSZIkSbnSosGOiDghIl6PiCURMSMiKgvHB0TEtIhYGhHPRsTORW02jYg7I2JRRPwjIi4u6bPBbSVJkiRJUv60WLAjIgYC/w2cBmwB7A/MjIj2wIPA5cA2wARgRFHTHwK7AzsDBwKXRMQhhT4b3FaSJEmSJOVTS2Z2XAlclVL6Y0ppVUrp7ymlvwODgKkppZEppeVkAYqeEdGl0O4U4OqU0gcppdeBO4DBhbLGtJUkSZIkSTnUIsGOiGgF9AG2i4i3ImJORNwcEW2BrsAr1XVTSkuAGUDXiNga2KG4vPC+a+F9Y9pKkiRJkqQcaqnMjs8DGwPHApVABdAL+C+gHbCwpP5Csqku7Yo+l5bRyLa1RMRZETEhIibMmzevvKuSJEmSJEkbnJYKdiwr/PxZSmluSmk+cANwGLAY2LKk/pbAh4UySsqry2hk21pSSrenlPqklPpst912ZV2UJEmSJEna8LRIsCOl9AEwB0h1FE8FelZ/iIjNgd3I1uL4AJhbXF54P7UJ2kqSJEmSpBxqyQVK7wLOj4jtC+tpXAiMAh4CukXEMRHRBrgCeDWlNK3Q7h7gvyJi68LCo98EhhfKGtNWkiRJkiTlUEsGO64G/gS8AbwOTAaGppTmAccAQ4EPgL2BE4ra/YBs0dG3geeA61NKvwNoTFtJkiRJkpRPrVvqRCmlFcA5hVdp2dNAl9UaZWUfAacXXnWVN7itJEmSJEnKn5bM7JAkSZIkSWp2BjskSZIkSVKuGOyQJEmSJEm5YrBDkiRJkiTlisEOSZIkSZKUKwY7JEmSJElSrhjskCRJkiRJuWKwQ5IkSZIk5YrBDkmSJEmSlCsGOyRJkiRJUq4Y7JAkSZIkSblisEOSJEmSJOWKwQ5JkiRJkpQrBjskSZIkSVKuGOyQJEmSJEm5YrBDkiRJkiTlisEOSZIkSZKUKwY7JEmSJElSrhjskCRJkiRJuWKwQ5IkSZIk5Urr9T0ASZI+i46898j1PQRJkqTcMrNDkiRJkiTlisEOSZIkSZKUKwY7JEmSJElSrhjskCRJkiRJueICpZKkJufim5IkSVqfzOyQJEmSJEm5YrBDkiRJkiTlisEOSZIkSZKUKwY7JEmSJElSrhjskCRJkiRJueJuLJIklcldZiRJkv41mNkhSZIkSZJyxWCHJEmSJEnKFYMdkiRJkiQpVwx2SJIkSZKkXDHYIUmSJEmScsXdWCRJuecuKpIkSZ8tZnZIkiRJkqRcMdghSZIkSZJyxWCHJEmSJEnKFYMdkiRJkiQpVwx2SJIkSZKkXDHYIUmSJEmScsVghyRJkiRJyhWDHZIkSZIkKVcMdkiSJEmSpFwx2CFJkiRJknLFYIckSZIkScoVgx2SJEmSJClXDHZIkiRJkqRcMdghSZIkSZJypcWCHRExJiKWR8Tiwmt6UdnXI+LtiFgSEQ9HxDZFZdtExEOFsrcj4usl/Ta4rSRJkiRJyp+Wzuw4L6XUrvDqDBARXYGfAycDnweWArcWtbkF+LhQdhLwv4U2jWorSZIkSZLyqfX6HgBZEOKxlNLzABFxOfB6RGwBrAKOAbqllBYD4yLiUbLgxncb2VaSJEmSJOVQS2d2XBsR8yPihYioKhzrCrxSXSGlNIMsG6NT4fVJSumNoj5eKbRpbNtaIuKsiJgQERPmzZvXiEuUJEmSJEnrU0sGOy4FOgI7ArcDj0XEbkA7YGFJ3YXAFmspo5Fta0kp3Z5S6pNS6rPddtuVe02SJEmSJGkD02LTWFJKLxd9vDsiTgQOAxYDW5ZU3xL4kGwqSn1lNLKtJEmSJEnKofW59WwCApgK9Kw+GBEdgU2BNwqv1hGxe1G7noU2NLKtJEmSJEnKoRYJdkTE5yLi4IhoExGtI+IkYH/g98B9wJERURkRmwNXAQ+mlD5MKS0BHgSuiojNI2Jf4Gjg/wpdN6atJEmSJEnKoZbK7NgYuAaYB8wHzge+mlKanlKaCpxNFrh4l2xNjXOK2p4DtC2U/Rr4VqENjWkrSZIkSZLyqUXW7EgpzQP6rqH8V8Cv6il7H/hqc7SVJEmSJEn5sz7X7JAkSZIkSWpyBjskSZIkSVKuGOyQJEmSJEm5YrBDkiRJkiTlisEOSZIkSZKUKwY7JEmSJElSrhjskCRJkiRJuWKwQ5IkSZIk5YrBDkmSJEmSlCsGOyRJkiRJUq4Y7JAkSZIkSblisEOSJEmSJOWKwQ5JkiRJkpQrBjskSZIkSVKuGOyQJEmSJEm50np9D0CSpOY2491lTdLPbtu3bZJ+JEmS1LzM7JAkSZIkSbliZockSevBhpZtcuS9RzZJPwCPfeOxJutLkiSpIczskCRJkiRJuWKwQ5IkSZIk5YrBDkmSJEmSlCsGOyRJkiRJUq4Y7JAkSZIkSblisEOSJEmSJOWKwQ5JkiRJkpQrBjskSZIkSVKuGOyQJEmSJEm5YrBDkiRJkiTlisEOSZIkSZKUKwY7JEmSJElSrhjskCRJkiRJuWKwQ5IkSZIk5YrBDkmSJEmSlCsGOyRJkiRJUq4Y7JAkSZIkSblisEOSJEmSJOWKwQ5JkiRJkpQrBjskSZIkSVKuGOyQJEmSJEm5YrBDkiRJkiTlisEOSZIkSZKUKwY7JEmSJElSrhjskCRJkiRJuWKwQ5IkSZIk5YrBDkmSJEmSlCsGOyRJkiRJUq4Y7JAkSZIkSblisEOSJEmSJOWKwQ5JkiRJkpQrBjskSZIkSVKuGOyQJEmSJEm50uLBjojYPSKWR8S9Rce+HhFvR8SSiHg4IrYpKtsmIh4qlL0dEV8v6a/BbSVJkiRJUv6sj8yOW4A/VX+IiK7Az4GTgc8DS4FbS+p/XCg7CfjfQptGtZUkSZIkSfnUuiVPFhEnAAuAF4EvFQ6fBDyWUnq+UOdy4PWI2AJYBRwDdEspLQbGRcSjZMGN7zayrSRJkiRJyqGygh0R0Sql9EljThQRWwJXAQOAM4qKupIFPwBIKc2IiI+BTmQBi09SSm8U1X8FOKAJ2kqSmsGMd5c1WV+7bd+2yfqSJEnSZ0e501jmRsRPI6JPI851NfDLlNLskuPtgIUlxxYCW6ylrLFta4mIsyJiQkRMmDdv3louRZIkSZIkbajKDXYcCnwCPBYRr0fE9yNip3JPEhEVwL8DP6mjeDGwZcmxLYEP11LW2La1pJRuTyn1SSn12W677eq/GEmSJEmStEEraxpLSmkiMDEi/hM4CPgG8OeImAT8HzAipbRkDV1UAbsAf4sIyLIuWkXEnsDvgJ7VFSOiI7Ap8AbZVJTWEbF7SunNQpWewNTC+6mNaCtJkiRJknJonXZjSSmtAqYVXvOAHckWCZ0dESevoentwG5AReF1G/A4cDBwH3BkRFRGxOZk63o8mFL6sBBAeRC4KiI2j4h9gaPJAiw0sq0kSZIkScqhsoIdEbF1RAyJiHHARLIgxykppU4ppQFkQYub6mufUlqaUvpH9YtsisnylNK8lNJU4GyywMW7ZGtqnFPU/BygbaHs18C3Cm1oTFtJkiRJkpRP5W49Owd4liyg8UhK6aPiwpTSnyLikXJPmlL6YcnnXwG/qqfu+8BX19BXg9tKkiRJkqT8KTfY0TGl9M81VUgpDW78cCRJkiRJkhqn3DU7TouIvsUHIqJfRFzSDGOSJEmSJElqsHKDHd8GXis59hpwYdMOR5IkSZIkqXHKDXZsAqwoOfYx0KZphyNJkiRJktQ45QY7JlJ7lxPIdkGZ1LTDkSRJkiRJapxyFyi9CHgqIk4GZgBfAj4PDGyugUmSJEmSJDVEWcGOlNLUiOgEHAl0AB4ERqWUFjfn4CRJkiRJktZVuZkdFAIbv27GsUiSJEmSJDVaWcGOiNgVGApUAO2Ky1JKOzXDuCRJkiRJkhqk3MyOX5Gt1fEdYGnzDUeSJEmSJKlxyg12dAX2TSmtas7BSJIkSZIkNVa5W88+D/RqzoFIkiRJkiQ1hXIzO2YBv4+IB4F/FBeklK5o6kFJkiRJkiQ1VLnBjs2Bx4CNgS8233AkSZIkSZIap6xgR0rptOYeiCRJkiRJUlMoN7ODiNgDOBb4fErpvIjoDGyaUnq12UYnSZIkSZK0jspaoDQijiNbpHRH4JTC4S2AG5ppXJIkSZIkSQ1S7m4sVwEDU0pnA58Ujr0C9GyWUUmSJEmSJDVQucGO7cmCGwCp6Gequ7okSZIkSdL6UW6wYyJwcsmxE4DxTTscSZIkSZKkxil3gdILgD9ExBnA5hHxe6ATcFCzjUySJEmSJKkByt16dlpEdAGOAEYBs4FRKaXFzTk4SZIkSZKkdVX21rMppaXA/c04FkmSJEmSpEYrK9gREWOpZzHSlNL+TToiSZIkSZKkRig3s+MXJZ//DTgDuLdphyNJkiRJktQ45a7ZcXfpsYj4LXAXcFVTD0qSJEmSJKmhyt16ti5/B3o01UAkSZIkSZKaQrlrdpxecmgzYBDwxyYfkSRJkiRJUiOUu2bHySWflwAvAj9p2uFIkiRJkiQ1TrlrdhzY3AORJEmSJElqCuVOY+lYTr2U0szGDUeSJEmSJKlxyp3G8haQCu+j6H31ZwrHWjXRuCRJkiRJkhqk3N1YzgB+A3QB2hR+/go4I6W0UeFloEOSJEmSJK135WZ2XA3snlJaVvj8ZkQMAd4AhjfHwCRJkiRJkhqi3MyOjYBdSo7tjNNWJEmSJEnSBqbczI6fAM9ExF3AbOCLwGDcelaSJEmSJG1gyt169vqI+DNwHNALmAucnlL6XXMOTpIkSZIkaV2Vm9lBIbBhcEOSJK3Rkfce2ST9PPaNx5qkH0mS9NlTVrAjIjYFrgBOBLZNKW0VEQcBnVJKNzfnACVJUv1mvLts7ZXKsNv2bZukH0mSpA1BuQuU/gToBpwEpMKxqcC3mmNQkiRJkiRJDVXuNJb/AL6UUloSEasAUkp/j4gdm29okiRJkiRJ667czI6PKQmMRMR2wHtNPiJJkiRJkqRGKDezYyRwd0RcBBARXwBuBH7TXAOTJJXnx4//bX0PQZIkSdqglJvZ8X1gFvBn4HPAm8A7wJXNMyxJkiRJkqSGWWtmR0RsBOwHXJpSurAwfWV+SimtpakkSZIkSVKLW2tmR0ppFfBISumjwud5BjokSZIkSdKGqtxpLM9HxD7NOhJJkiRJkqQmUO4CpW8DT0bEI8BsoCazI6V0RXMMTJIkSZIkqSHqzeyIiPOKPm4FPEwW5OgAfLHoJUmSJEmStMFYU2bHUODmwvsjU0pbtsB4JEmSJEmSGmVNwY4ZEfFjYCqwcUScBkRppZTSnc01OEmSpMY68t4jm6Sfx77xWJP0I0mSmt+aFig9gWz6yonAxsApwMklr2+Ue6KIuDci5kbEooh4IyLOLCobEBHTImJpRDwbETsXlW0aEXcW2v0jIi4u6bfBbSVJkiRJUv7Um9mRUnoDOBMgIkanlAY08lzXAmeklD6KiC7AmIiYTLb46YOFcz0GXA2MAKp3f/khsDuwM/BvwLMR8VpK6XcR0b6hbRt5LZIkSZIkaQNV1tazTRDoIKU0NaX0UfXHwms3YBAwNaU0MqW0nCxA0bMQEIEso+TqlNIHKaXXgTuAwYWyxrSVJEmSJEk5VFawo6lExK0RsRSYBswFngC6Aq9U10kpLQFmAF0jYmtgh+LywvuuhfeNaStJkiRJknKoRYMdKaVzgC2ASrLpJx8B7YCFJVUXFuq1K/pcWkYj29YSEWdFxISImDBv3rxyL0mSJEmSJG1gWjTYAZBS+iSlNA7oAHwLWAyUbmu7JfBhoYyS8uoyGtm2dFy3p5T6pJT6bLfdduVfkCRJkiRJ2qC0eLCjSGuyNTumAj2rD0bE5tXHU0ofkE136VnUrmehDY1sK0mSJEmScqhFgh0RsX1EnBAR7SKiVUQcTLal7TPAQ0C3iDgmItoAVwCvppSmFZrfA/xXRGxdWHj0m8DwQllj2kqSJEmSpBxqqcyORDZlZQ7wATAMuDCl9EhKaR5wDDC0ULY3cEJR2x+QLTr6NvAccH311rGNaStJkiRJkvKpdUucpBCUOGAN5U8DXeop+wg4vfBq0raSJEmSJCl/1ueaHZIkSZIkSU3OYIckSZIkScoVgx2SJEmSJClXDHZIkiRJkqRcMdghSZIkSZJyxWCHJEmSJEnKFYMdkiRJkiQpVwx2SJIkSZKkXDHYIUmSJEmScsVghyRJkiRJyhWDHZIkSZIkKVcMdkiSJEmSpFxpvb4HIElSfWa8u2x9D0GSJEn/gszskCRJkiRJuWKwQ5IkSZIk5YrBDkmSJEmSlCsGOyRJkiRJUq64QKkkSWVywVRJkqR/DWZ2SJIkSZKkXDHYIUmSJEmScsVpLJK0Dn78+N+arK/vHL5Tk/UlSZIk6VNmdkiSJOn/t3f/wZaX9X3A3x9YCpQfCuVHa6NkusFCMANNaO0kY9CJ8VeyowmZ1lQkhiQyMiR1dGptg0pQGjOOiamlcUghotiMOIPGhcRarTrFqWlJLLY7ApEooEBYdLMusKwRP/3jnG2vN0B3ud97z73Pfb1mznDO9znPs5/Dd+7eu+/7/ACAoQg7AAAAgKFYxgJsClMuPwHWxrZrty26BABggxJ2AACTHau79aQjJxkHAGAlLGMBAAAAhiLsAAAAAIYi7AAAAACGIuwAAAAAhiLsAAAAAIYi7AAAAACGIuwAAAAAhiLsAAAAAIYi7AAAAACGIuwAAAAAhiLsAAAAAIYi7AAAAACGIuwAAAAAhiLsAAAAAIYi7AAAAACGIuwAAAAAhiLsAAAAAIYi7AAAAACGIuwAAAAAhiLsAAAAAIYi7AAAAACGsmXRBQBsVu+88a5FlwAAAEMyswMAAAAYirADAAAAGIqwAwAAABiKsAMAAAAYirADAAAAGMqahB1VdXhVXVVVd1bVnqr6fFW9eEn7j1XVrVX1cFV9qqpOWdb36qr6ZlXdV1WvWzb2k+4LAAAAjGetZnZsSXJ3knOSPCXJm5JcV1XfW1UnJLl+fu34JDcn+eCSvpcmOTXJKUmel+QNVfWiJFlJXwAAAGBMW9biD+nuhzILHva7oaq+nOSHkvytJDu6+0NJUlWXJnmgqk7r7luTnJ/k57t7V5JdVfW7SV6V5GNJfnoFfQEW6qrbLphknF/4+1dPMg4AAIxiIXt2VNXJSZ6ZZEeSM5Lcsr9tHozckeSMqjouydOWts+fnzF/vpK+y2t6dVXdXFU379y5c2UfEAAAAFiYNQ87quqwJB9Ics189sXRSXYve9vuJMfM27KsfX9bVtj3u3T3ld19dneffeKJJx74BwIAAADWlTUNO6rqkCTvT/KtJBfPLz+Y5Nhlbz02yZ55W5a1729baV8AAABgQGsWdlRVJbkqyclJzu3uv5o37Uhy5pL3HZVka2Z7cexKcu/S9vnzHRP0BQAAAAa0ljM7fifJ6Um2dffeJdc/nORZVXVuVR2R5M1JvjBf4pIk70tySVUdV1WnJfmlJO+doC8AAAAwoDUJO6rqlCQXJjkryX1V9eD88Yru3pnk3CSXJ9mV5NlJXr6k+1sy23T0ziSfSfKO7v5YkqykLwAAADCmtTp69s4k9QTtn0hy2uO07UtywfwxaV8AAABgPAs5ehYAAABgtQg7AAAAgKEIOwAAAIChCDsAAACAoQg7AAAAgKEIOwAAAIChCDsAAACAoQg7AAAAgKEIOwAAAIChCDsAAACAoWxZdAEArMxVt12w6BLg/7rj/r2TjbX1pCMnGwsA2FzM7AAAAACGYmYHAMAB2HbttsnG2n7e9snGAgD+OjM7AAAAgKEIOwAAAIChCDsAAACAodizAwAY2lQnxDgdBgA2DjM7AAAAgKEIOwAAAIChCDsAAACAodizAwBYl6baawMA2HzM7AAAAACGIuwAAAAAhmIZCwDAGtt27bZJxtl+3vZJxgGA0Qg7gFXxzhvvmmSc1//EMyYZBwAA2DwsYwEAAACGIuwAAAAAhiLsAAAAAIYi7AAAAACGIuwAAAAAhiLsAAAAAIYi7AAAAACGIuwAAAAAhiLsAAAAAIYi7AAAAACGIuwAAAAAhrJl0QUAPJF33njXoksAAAA2GDM7AAAAgKEIOwAAAIChCDsAAACAoQg7AAAAgKEIOwAAAIChOI0FAGCD2nbttknG2X7e9knGAYD1wswOAAAAYCjCDgAAAGAowg4AAABgKMIOAAAAYCjCDgAAAGAowg4AAABgKMIOAAAAYCjCDgAAAGAoaxZ2VNXFVXVzVe2rqvcua/uxqrq1qh6uqk9V1SlL2g6vqqur6ptVdV9VvW6qvgAAAMB41nJmxz1J3pbk6qUXq+qEJNcneVOS45PcnOSDS95yaZJTk5yS5HlJ3lBVL1ppXwAAAGBMaxZ2dPf13f2RJF9f1vTTSXZ094e6+5HMAoozq+q0efv5Sd7a3bu6+4tJfjfJqyboCwAAAAxoPezZcUaSW/a/6O6HktyR5IyqOi7J05a2z5+fMUFfAAAAYEDrIew4OsnuZdd2Jzlm3pZl7fvbVtr3u1TVq+d7ity8c+fOg/oAAAAAwPqxHsKOB5Mcu+zasUn2zNuyrH1/20r7fpfuvrK7z+7us0888cSD+gAAAADA+rEewo4dSc7c/6KqjkqyNbO9OHYluXdp+/z5jgn6AgAAAANay6Nnt1TVEUkOTXJoVR1RVVuSfDjJs6rq3Hn7m5N8obtvnXd9X5JLquq4+cajv5TkvfO2lfQFAAAABrSWMzsuSbI3yRuTnDd/fkl370xybpLLk+xK8uwkL1/S7y2ZbTp6Z5LPJHlHd38sSVbSFwAAABjTlrX6g7r70syOhn2stk8kOe1x2vYluWD+mLQvAAAAMJ71sGcHAAAAwGSEHQAAAMBQhB0AAADAUIQdAAAAwFCEHQAAAMBQ1uw0FgAAkjvu3zvZWFtPOnKysQBgJGZ2AAAAAEMxswMAYJPbdu22ScbZft72ScYBgJUyswMAAAAYirADAAAAGIqwAwAAABiKsAMAAAAYig1KAQCYxFQbnSY2OwVgZczsAAAAAIYi7AAAAACGYhkLAMAGdcf9eycZZ+tJR04yDgCsF2Z2AAAAAEMxswMA4ABMNYsCAFh9ZnYAAAAAQxF2AAAAAEMRdgAAAABDEXYAAAAAQxF2AAAAAEMRdgAAACWFeO0AAAtdSURBVABDEXYAAAAAQxF2AAAAAEMRdgAAAABD2bLoAgAAYLlt126bZJzt522fZBwANhYzOwAAAIChCDsAAACAoQg7AAAAgKEIOwAAAICh2KAUAIBJ3HH/3knG2XrSkZOMA8DmZWYHAAAAMBQzOwAAGJYjbAE2J2EHAMAmN9Xyk5EJTQA2FstYAAAAgKEIOwAAAIChCDsAAACAoQg7AAAAgKEIOwAAAIChCDsAAACAoTh6FgCAdWXKo3C3nnTkZGNNYaojbBPH2AI8ETM7AAAAgKGY2QEAABvQVLNEzBABRiTsAABgWFMuiZnCeltWAzAqYQcAAKyRqcIXoQnAE7NnBwAAADAUYQcAAAAwFMtYAABgE7PRKTAiYQcAALBiQhNgPRk+7Kiq45NcleQFSR5I8q+6+z8utioAAHjy1tspM8l0m6ZOFZokghPYzIYPO5JckeRbSU5OclaSG6vqlu7esdiypuWbAgAAi+SkGWA9GTrsqKqjkpyb5Fnd/WCSm6rqo0lemeSNCy1uYusx3QcAgIM15c+13/+bz59knKkCGL9UhLUzdNiR5JlJHu3u25dcuyXJOQuqBwAA2GCmCmCmCl/WI4EQ683oYcfRSXYvu7Y7yTHL31hVr07y6vnLB6vqtlWubWonZLYnyYrV62uKYZjeZPeYdcn9HZv7Ozb3d2zu79jc34l8caJx6pWT/lvE/R3TKQfyptHDjgeTHLvs2rFJ9ix/Y3dfmeTKtShqNVTVzd199qLrYPW4x2Nzf8fm/o7N/R2b+zs293ds7u/mdsiiC1hltyfZUlWnLrl2ZpKhNicFAAAA/p+hw47ufijJ9Ukuq6qjqupHkrw0yfsXWxkAAACwWoYOO+YuSnJkkvuT/H6S14x27Ozchl2CwwFzj8fm/o7N/R2b+zs293ds7u/Y3N9NrLp70TUAAAAATGYzzOwAAAAANhFhBwAAADAUYccGV1XHV9WHq+qhqrqzqv7ZomtiOlV1cVXdXFX7quq9i66HaVXV4VV11fxrd09Vfb6qXrzouphOVV1bVfdW1Ter6vaq+sVF18S0qurUqnqkqq5ddC1Mq6o+Pb+3D84fty26JqZVVS+vqi/Of46+o6qes+iaWLklX7P7H49W1bsXXRdrb8uiC2DFrkjyrSQnJzkryY1Vdcugm7BuRvckeVuSF2a20S5j2ZLk7iTnJLkryUuSXFdVP9DdX1lkYUzm15P8Qnfvq6rTkny6qj7f3X+y6MKYzBVJ/seii2DVXNzd/2HRRTC9qvrxJL+R5J8m+e9J/s5iK2Iq3X30/udVdVSSv0jyocVVxKKY2bGBzb94z03ypu5+sLtvSvLRJK9cbGVMpbuv7+6PJPn6omthet39UHdf2t1f6e7vdPcNSb6c5IcWXRvT6O4d3b1v/8v5Y+sCS2JCVfXyJH+Z5JOLrgU4aL+W5LLu/tz8e/DXuvtriy6Kyf1MZqdy/tdFF8LaE3ZsbM9M8mh3377k2i1JzlhQPcAKVNXJmX1dm5k1kKr691X1cJJbk9yb5A8XXBITqKpjk1yW5PWLroVV9etV9UBVfbaqnrvoYphGVR2a5OwkJ1bVl6rqq1X176rKLNrx/FyS97UjSDclYcfGdnSS3cuu7U5yzAJqAVagqg5L8oEk13T3rYuuh+l090WZ/b38nCTXJ9n3xD3YIN6a5KruvnvRhbBq/mWSv5fk7ya5Msn2qjIzawwnJzkss9/6PyezpeD/IMkliyyKaVXVMzJbKnzNomthMYQdG9uDSY5ddu3YJHsWUAvwJFXVIUnen9n+OxcvuBxWQXc/Ol9q+D1JXrPoeliZqjoryfOT/Naia2H1dPcfd/ee7t7X3dck+Wxmeyux8e2d//fd3X1vdz+Q5Dfj/o7m/CQ3dfeXF10Ii2GD0o3t9iRbqurU7v6z+bUzYwo8bBhVVUmuyuy3TC/p7r9acEmsri2xZ8cInpvke5PcNfsSztFJDq2q7+/uH1xgXayuTlKLLoKV6+5dVfXVzO4p4zo/ydsXXQSLY2bHBtbdD2U2Jfqyqjqqqn4kyUsz+w0xA6iqLVV1RJJDM/tB+oiqElKO5XeSnJ5kW3fv/f+9mY2jqk6aH2t4dFUdWlUvTPKzSf7Lomtjxa7MLLQ6a/54T5IbMzs5iwFU1VOr6oX7v+9W1SuS/GiS/7To2pjM7yX55fnf1ccleW2SGxZcExOpqh/ObAmaU1g2Mf9o2vguSnJ1ZrsMfz3Jaxw7O5RLkrxlyevzMts9/NKFVMOkquqUJBdmtofDffPfECfJhd39gYUVxlQ6syUr78nslwt3Jnltd//BQqtixbr74SQP739dVQ8meaS7dy6uKiZ2WGZHv5+W5NHMNhh+WXffttCqmNJbk5yQ2UzpR5Jcl+TyhVbElH4uyfXdbXn/JlY2pgUAAABGYhkLAAAAMBRhBwAAADAUYQcAAAAwFGEHAAAAMBRhBwAAADAUYQcAAAAwFGEHADCpqvpKVT1/gnFeUFUfWUH/V1TVx1daxxSq6leq6u2LrgMANgthBwCwXv2bJE86IOjuD3T3Cyas5wlV1ZVVdVtVfaeqXrWs+cok51XVSWtVDwBsZsIOAGDdqap/mOQp3f25J9l/y8Ql7R/301X13MdpviXJRUn+dHlDdz+S5I+SnL8adQEA303YAQCsiqo6vKreVVX3zB/vqqrDl7S/oarunbf9YlV1VX3fvPnFST6zbLyeLwf586p6oKreUVWHzNteVVWfrarfqqpvJLl0fu2mJf3PqKr/XFXfqKq/qKp/Pb9+SFW9saruqKqvV9V1VXX8wX7e7r6iuz+Z5JHHecunk/zEwY4LABw8YQcAsFp+Nck/TnJWkjOT/KMklyRJVb0oyeuSPD/J9yU5Z1nfH0hy22OM+VNJzk7yg0lemuSCJW3PTvLnSU5KcvnSTlV1TJJPJPlYkqfN/8xPzpt/JcnL5jU8LcmuJFcc5Gc9EF/M7P8DALDKhB0AwGp5RZLLuvv+7t6Z5NeSvHLe9k+S/F537+juh+dtSz01yZ7HGPM3uvsb3X1Xkncl+dklbfd097u7+9vdvXdZv59Mcl93v7O7H+nuPd39x/O2C5P8and/tbv3Jbk0yc+swlKYPUmeMvGYAMBjWJX1rAAAmc2SuHPJ6zvn1/a33byk7e5lfXclOeYxxlz6vqXjPdYYSz09yR2P03ZKkg9X1XeWXHs0yclJvlZVf7nk+tFJbqiqb89fv727D3QT1WOS7D7A9wIAK2BmBwCwWu7JLEjY7xnza0lyb5LvWdL29GV9v5DkmY8x5tL3LR0vSfoJark7ydYnaHtxdz91yeOI7v5akiy9nuSmJD+55NrBnBZzemabmAIAq0zYAQCslt9PcklVnVhVJyR5c5Jr523XJfn5qjq9qv7mvG2pP8xf38cjSf5FVR1XVU9P8s+TfPAAa7khyd+uqtfON049pqqePW97T5LLq+qUJJnX+9ID/pRzVfU3quqIJJXksKo6Yv8GqnPnZHYiCwCwyoQdAMBqeVtmS1W+kOR/ZXYk69uSpLv/KMm/TfKpJF9K8t/mffbN2/80ye4lgcR+f5DkT5L8zyQ3JrnqQArp7j1JfjzJtiT3JfmzJM+bN/92ko8m+XhV7Unyucw2Oz1YH0+yN8kPJ7ly/vxHk2QegrwkyTVPYlwA4CBV9xPN+AQAWH1VdXqS/53k8O7+9vzaC5Jc1N0vm7/uJKd295cWV+mTU1W/nOTp3f2GRdcCAJuBsAMAWIiq+qnMZmccldmMh+/sDzYe5/0bNuwAANaWZSwAwKJcmGRnZqekPJrkNYstBwAYhZkdAAAAwFDM7AAAAACGIuwAAAAAhiLsAAAAAIYi7AAAAACGIuwAAAAAhiLsAAAAAIbyfwAUX9fMalRQsAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x23a0b817208>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fig, ax = plt.subplots(figsize=(18,8))\n",
    "ax.hist(np.log(shipping_fee_by_seller+1), color='#8CB4E1', alpha=1.0, bins=50,\n",
    "       label='Price when Seller pays Shipping')\n",
    "ax.hist(np.log(shipping_fee_by_buyer+1), color='#007D00', alpha=0.7, bins=50,\n",
    "       label='Price when Buyer pays Shipping')\n",
    "plt.xlabel('log(price+1)', fontsize=12)\n",
    "plt.ylabel('frequency', fontsize=12)\n",
    "plt.title('Price Distribution by Shipping Type', fontsize=15)\n",
    "plt.tick_params(labelsize=12)\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It is obvious that the average price is higher when buyer pays shipping."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Category Names"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Category names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are 1265 unique values in category name column\n"
     ]
    }
   ],
   "source": [
    "print('There are', train['category_name'].nunique(), 'unique values in category name column')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Top 10 most common category names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Women/Athletic Apparel/Pants, Tights, Leggings                 48028\n",
       "Women/Tops & Blouses/T-Shirts                                  37173\n",
       "Beauty/Makeup/Face                                             27438\n",
       "Beauty/Makeup/Lips                                             23949\n",
       "Electronics/Video Games & Consoles/Games                       21265\n",
       "Beauty/Makeup/Eyes                                             20133\n",
       "Electronics/Cell Phones & Accessories/Cases, Covers & Skins    19708\n",
       "Women/Underwear/Bras                                           17065\n",
       "Women/Tops & Blouses/Tank, Cami                                16225\n",
       "Women/Tops & Blouses/Blouse                                    16200\n",
       "Name: category_name, dtype: int64"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['category_name'].value_counts()[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Item condition vs. Price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x23a2c337d30>"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEMCAYAAADHxQ0LAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAGPRJREFUeJzt3X+U1fV95/HXC4YIUq2io3VUhCEpGoM/pyYVdcFoNtA0bXazrdAkPWm2YDdNlaYR292uG3t2jx7X2uScnAZq0qabjF01uqdJMBEVENtEHRRERFIZlJqBMDqgKBaEee8f8x3mB3C5d5jP/d57v8/HOXO4nzvf+/2875fhxWc+9/v9fB0RAgA0vjF5FwAAqA4CHwAKgsAHgIIg8AGgIAh8ACgIAh8ACiJp4NteZHuD7edt32N7fMr+AABHlizwbZ8p6Y8ktUXEBySNlXRdqv4AAKWlntJpkjTBdpOk4yV1Je4PAHAEyQI/In4m6X9L2ippm6Q3IuLhVP0BAEprSrVj2ydL+g1JUyXtknSf7U9FxLeHbbdA0gJJmjhx4qXnnntuqpIAoOGsWbPmtYhoLmfbZIEv6RpJWyKiW5JsPyDpcklDAj8ilkpaKkltbW3R0dGRsCQAaCy2Xyl325Rz+Fslfcj28bYt6cOSNibsDwBQQso5/Ccl3S/pGUnrs76WpuoPAFBayikdRcQtkm5J2QcAoDxcaQsABUHgD7Jq1SrNnTtXq1evzruU3G3evFmf/OQn1dnZmXcpueNYoFEQ+IPceeedkqQ77rgj50ryd8cdd2jPnj0cC3Es0DgI/MyqVau0f/9+SdL+/fsLPcrfvHmztm7dKkl65ZVXCj2y5VigkRD4mf7Rfb8ij+aGv3eOxZHbQD0h8DP9o/sjtYukf0Tb75VXyr6uo+FwLNBICHwc4rjjjivZLpIzzzxzSPuss87KqRLg2BH4mb6LgY/cLpK9e/eWbBfJ1KlTS7aBekLgZyKiZBvFtGbNmiFt1npCPSPwgRImTZpUsg3UEwIfKGH79u0l20A9IfABoCAIfKAEzlhCIyHwgRL27NlTsg3UEwIfKGHs2LEl20A9IfCBEg4cOFCyXTQ9PT266aab1NPTk3cpGAECH0DZ2tvbtWHDBt1zzz15l4IRSBb4tqfbXjvo603bN6bqD0BaPT09Wr58uSJCDz/8MKP8OpTynrabIuKiiLhI0qWS9kh6MFV/ANJqb28fsoQ4o/z6U60pnQ9L2hwRLDUI1KkVK1YcXHIkIvTYY4/lXBEqVa3Av04SwwGgjjU3Nw9pn3baaTlVgpFKHvi23yPp45LuO8L3F9jusN3R3d2duhwAIzT83+eOHTtyqgQjVY0R/hxJz0TEzw/3zYhYGhFtEdE2fAQBoHbMnj374LLhtnX11VfnXBEqVY3Anyemc4C6N3/+fDU1NUmSmpqaNG/evJwrQqWSBr7t4yVdK+mBlP0ASG/SpEm68sorJUlXXXUVS0XXoaaUO4+IPZJOSdkHgOop8p3gGgFX2gIoS09Pj1avXi1JWr16NRde1SECH0BZ2tvb1dvbK0nq7e3lwqs6ROADKMvKlSuHXGm7YsWKnCtCpQh8AGWZNWvWkNMyZ8+enXNFqBSBD6Asc+bMGbK0wpw5c3KuCJUi8AGU5aGHHirZRu0j8AGUZfhiaY8++mhOlWCkCHwAZeF2j/WPwAdQlrfffrtkG7WPwAeAgiDwAZRlzJgxJduoffyNASjL5ZdfPqQ9c+bMnCrBSBH4AMqyb9++km3UPgIfQFmefvrpIe2nnnoqp0owUgQ+gLL0X2V7pDZqH4EPAAVB4AMoS//tDfuNGzcup0owUqlvcXiS7fttv2h7o+1fTdkfgHT6l0bu9+677+ZUSW3o6enRTTfdVFc3gkk9wv+KpB9GxLmSLpS0MXF/AFAV7e3t2rBhQ13dCCZZ4Ns+UdJVkr4hSRGxLyJ2peoPAKqlp6dHjzzyiCJCy5cvr5tRfsoRfqukbkl/a/tZ23fbnpiwPwCoivb2dh04cECSdODAgboZ5acM/CZJl0j664i4WNLbkm4evpHtBbY7bHd0d3cnLAcARsfKlSuHBH693O4xZeC/KunViHgya9+vvv8AhoiIpRHRFhFtzc3NCcsBgNFxySVDo+zSSy/NqZLKJAv8iNgu6V9tT8+e+rCkF1L1BwDV8tJLL5Vs16qmo29yTL4g6Tu23yOpU9JnE/cHAMlt3759SHvbtm05VVKZpIEfEWsltaXsAwCqzfaQpSVs51hN+bjSFgAqNHxp6CuuuCKnSipD4ANAha6//voh7YULF+ZUSWUIfACo0M6dO4e0d+2qj2tKCXwAqNCtt95asl2rCHwAqNDwi0R37NiRUyWVIfABoCAIfAAoCAIfAAqCwAeACo0fP75ku1YR+ABQoQsuuGBI+8ILL8ypksoQ+ABQoXXr1g1pr127NqdKKkPgA0CFht/Pt17u70vgA0CFent7S7ZrFYEPAAWRej18AKh5S5YsUWdn5zHtY/HixWVt19ramttia4zwAaAgGOEDKLxKR9zXX3+9tm7derB9zjnn6Pbbbx/tskZd0hG+7Zdtr7e91nZHyr4AoFq+9KUvlWzXqmqM8GdHxGtV6AcAqmLatGk67rjjtHfvXp1zzjlqbW3Nu6SyMIcPACNw9tlna8yYMXUzupfSj/BD0sO2Q9KSiFiauL+DivKpO4B8TJgwQeeff37djO6l9IE/MyK6bJ8mabntFyPi8cEb2F4gaYEkTZ48OXE5AFBcSQM/IrqyP3fYflDSZZIeH7bNUklLJamtrS1Gq+9KR9xz58495Ll6+NQdAMqVbA7f9kTbJ/Q/lvQRSc+n6g8AUFrKD21Pl/SE7XWSnpL0g4j4YcL+jsmyZctKtgGg3iWb0omITkn1sUg0ABQAV9oOMmPGDEmNN3fPGUsAJM7DB4DCYIRfAJyxNIDfdlBkjPABoCAY4eMQy5YtGzLKb6QzlvhtZwC/7RQPI3wAKAhG+DisRj1jqVL8tjOgkX/bKQpG+ADKwsWJ9Y/AB45ixowZmjFjBgGHukfgAygb//nVNwIfAAqCwAeAgiDwAaAgCHwAKAgCHwAKgsAHgIIoO/Btn2P7muzxhP7bFwIA6kNZgW/79yXdL2lJ9tRZkv5fma8da/tZ298fWYkAgNFQ7gj/85JmSnpTkiLiXySdVuZrb5C0sfLSAACjqdzA3xsR+/obtpskxdFeZPssSb8m6e6RlQcAGC3lBv4q238maYLtayXdJ+l7ZbzuryTdJKl3hPUBAEZJuYF/s6RuSeslLZS0TNJ/K/UC2x+TtCMi1hxluwW2O2x3dHd3l1kOAKBS5a6HP0HSNyPib6S+D2Kz5/aUeM1MSR+3PVfSeEkn2v52RHxq8EYRsVTSUklqa2s76jQRAGBkyh3hP6q+gO83QdIjpV4QEX8aEWdFxBRJ10l6bHjYAwCqp9zAHx8Rb/U3ssfHpykJAJBCuYH/tu1L+hu2L5X0TrmdRMTKiPhYpcUBAEZPuXP4N0q6z3ZX1j5D0m+nKQkAkEJZgR8RT9s+V9J0SZb0YkS8m7QyAMCoKhn4tq+OiMds/4dh33qfbUXEAwlrAwCMoqON8P+dpMck/fphvheSCHwAqBMlAz8ibrE9RtJDEXFvlWoCACRw1Dn8iOi1/YeSCHwANW3JkiXq7OysSl/9/SxevLgq/bW2tmrhwoXHtI9yz9JZbvtPJP1fSW/3PxkRPcfUOwCMos7OTj33wibp+EnpO9vXtzDAcy9XYUmYPaMTteUG/u+pb87+vwx7vnVUqgCA0XL8JMX5c/KuYlR5w0Ojsp9yA//96gv7K9QX/KslfX1UKgAAVEW5gf8t9d385KtZe1723G+lKGow5uQAYHSUG/jTI+LCQe0VttelKGi4zs5ObVq/Xs1jq3C/9d6+Zft7XtiQvKvuA5XfIoD//AZwLIDKlRv4z9r+UET8RJJsf1DSP6Ura6jmsWP0yeOPq1Z3VXH/nr0Vv6azs1Obf/qiJjefnKCiod7jvv+Q3t358+R9be3eWfFrOjs7tXnzZk2dOjVBRUMdd1zfz15vb/r7+GzZsiV5HyiucgP/g5I+Y3tr1p4saaPt9ZIiIi5IUh0OMbn5ZP3Zb30k7zJG1f+69+ERvW7q1Km69S/+YpSrydd///M/r/g1/LaDcpUb+B9NWgWAEevs7NSmf3lJp/zSmcn7ijF9kfHa7rIXyx2x17f/LHkfRVPu4mmvpC4EwMid8ktn6tf/8w15lzGqvnf3V/IuoeFU4ZNQAEAtIPABoCCSBb7t8bafsr3O9gbbX07VFwDg6Mr90HYk9kq6OiLesj1O0hO2H+o/tRMAUF3JAj8iQlL/jc/HZV+Rqj8AQGkpR/iyPVbSGknvlfS1iHgyZX8Aiq2rq0va8+aoLTZWM/a8rq6uY7+rbNLAj4gDki6yfZKkB21/ICKeH7yN7QWSFkjS5MmTD9lHV1eXdh/oHdGVqbWs+0Cv/q2r6+gbAsAoSRr4/SJil+2V6ruA6/lh31sqaakktbW1MeUDYMRaWlr02r5xDbk8cktL8zHvJ1ng226W9G4W9hMkXSPp9kr309LSop5dOxtyLZ1JLS0Vvaarq0t7dr854qUIatUrO3bq+HcO5F0G0PBSjvDPkPStbB5/jKR7I+L7CftDgXR1demdd94Z0doztWzLli2aMGFC3mWgQaU8S+c5SRen2n8RtbS06N2dYxty8bRxJ5+edxlAw6vKHD4w2lpaWtTb29uQq2WOGVPZ9ZBdXV3a/dbbDbf2zOvbXtW+3RPzLqOhsLQCABQEI3ygzrW0tOi13e805GqZp57A5xmjiRE+ABQEgQ8ABUHgA0BBEPgAUBB8aFtntnbvrMqVtj/ftVuSdPpJJyTva2v3Tk0bwXn4W7ZsqcqFV9u2bZMknXHGGcn72rJli6ZNm5a8n4a2p6c6i6f925t9f44/MX1fe3ok1fDSChh9ra2tVetr3863JakqF0RNO/n0it9bNY/F3r19C/dVen78SEybNq2q763RVPPYdXb2DYpapxx7EB9d86i8NwK/jixcuLBqfS1evFiSdPvtFS9/VBUcCxwOPxel1UXgd1dpeeRdvb2SpJOqMJLrPtCrScl7QVG8vv1nVbnS9o3XuyVJv3hK+lHt69t/plNPeG/yfoqk5gO/mr+i7erslCRNqkKfk1Td94bGVc2foze790tSVS6IOvWE9/JvZJTVfODzKxpQGv9GUC5OywSAgiDwAaAgCHwAKAgCHwAKIlng2z7b9grbG21vsN1Ya7cCQJ1JeZbOfklfjIhnbJ8gaY3t5RHxQsI+AQBHkGyEHxHbIuKZ7PFuSRslnZmqPwBAaVWZw7c9RX03NH+yGv0BAA6VPPBt/4Kk70q6MSLePMz3F9jusN3R3d2duhwAKKykgW97nPrC/jsR8cDhtomIpRHRFhFtzc3VWHUOAIop5Vk6lvQNSRsj4i9T9QMAKE/KEf5MSZ+WdLXttdnX3IT9AQBKSHZaZkQ8Icmp9g8AqAxX2gJAQRD4AFAQBD4AFASBDwAFQeADQEEQ+ABQEAQ+ABQEgQ8ABUHgA0BBEPgAUBCOiLxrOKitrS06OjpGZV9LlixRZ2dnRa/p3761tbWi17W2tmrhwoUVvaaaOBYDOBYDOBYD6vlY2F4TEW3lbJvyFod1Z/z48XmXUDM4FgM4FgM4FgPq8Vg07AgfAIqgkhE+c/gAUBAEPgAUBIEPAAVB4ANAQaS8p+03be+w/XyqPgAA5Us5wv87SR9NuH8AQAWSBX5EPC6pJ9X+AQCVYQ4fAAoi98C3vcB2h+2O7u7uvMsBgIaVe+BHxNKIaIuItubm5rzLAYCGlXvgAwCqI+VpmfdI+rGk6bZftf25VH0BAI4u2WqZETEv1b4BAJVjSgcACoLAB4CCIPABoCAIfAAoCAIfAAqCwAeAgiDwAaAgCHwAKAgCHwAKgsAHgIIg8AGgIAh8ACgIAh8ACoLAB4CCIPABoCAIfAAoCAIfAAoi2R2vJMn2RyV9RdJYSXdHxG0p+ztWc+fOPfh42bJlOVaSP47FAI7FgEWLFmnTpk0677zzdOedd+ZdTq56enp022236eabb9akSZPyLqcsKe9pO1bS1yTNkfR+SfNsvz9VfwDS27RpkyRp48aNOVeSv/b2dm3YsEH33HNP3qWULeWUzmWSXoqIzojYJ+kfJP1Gwv6OyeBR3OHaRcKxGMCxGLBo0aIh7S9+8Ys5VZK/np4ePfLII4oILV++XD09PXmXVJaUgX+mpH8d1H41ew5AHeof3fcr8ii/vb1dvb29kqTe3t66GeWnDHwf5rk4ZCN7ge0O2x3d3d0JywGA0bFy5Urt379fkrR//36tWLEi54rKkzLwX5V09qD2WZK6hm8UEUsjoi0i2pqbmxOWAwCjY9asWWpq6jvnpampSbNnz865ovKkDPynJb3P9lTb75F0naR/TNgfgISmT58+pH3eeeflVEn+5s+frzFj+uJzzJgxmjdvXs4VlSdZ4EfEfkl/KOlHkjZKujciNqTq71gNP92uyKffcSwGcCwG3HXXXUPaRT4tc9KkSbrmmmtkW9dee23dnJaZ9Dz8iFgmqbj/QoAGM3369IPn4Rfd/PnztXXr1roZ3UuSIw75HDU3bW1t0dHRkXcZAFA3bK+JiLZytmVpBQAoCAIfAAqCwAeAgiDwAaAgaupDW9vdkl7JuYxTJb2Wcw21gmMxgGMxgGMxoBaOxTkRUdZVqzUV+LXAdke5n3g3Oo7FAI7FAI7FgHo7FkzpAEBBEPgAUBAE/qGW5l1ADeFYDOBYDOBYDKirY8EcPgAUBCN8ACgIAj9j+5u2d9h+Pu9a8mb7bNsrbG+0vcH2DXnXlBfb420/ZXtddiy+nHdNebI91vaztr+fdy15s/2y7fW219qui0XAmNLJ2L5K0luS/j4iPpB3PXmyfYakMyLiGdsnSFoj6Tcj4oWcS6s625Y0MSLesj1O0hOSboiIn+RcWi5s/7GkNkknRsTH8q4nT7ZfltQWEXmfh182RviZiHhcUn3ciTixiNgWEc9kj3er734GhbwfcfR5K2uOy74KOUqyfZakX5N0d961YGQIfJRke4qkiyU9mW8l+cmmMdZK2iFpeUQU9Vj8laSbJPXmXUiNCEkP215je0HexZSDwMcR2f4FSd+VdGNEvJl3PXmJiAMRcZH67st8me3CTfnZ/pikHRGxJu9aasjMiLhE0hxJn8+mhWsagY/DyuarvyvpOxHxQN711IKI2CVppaSP5lxKHmZK+ng2b/0Pkq62/e18S8pXRHRlf+6Q9KCky/Kt6OgIfBwi+6DyG5I2RsRf5l1Pnmw32z4pezxB0jWSXsy3quqLiD+NiLMiYoqk6yQ9FhGfyrms3NiemJ3QINsTJX1EUs2f4UfgZ2zfI+nHkqbbftX25/KuKUczJX1afaO4tdnX3LyLyskZklbYfk7S0+qbwy/8KYnQ6ZKesL1O0lOSfhARP8y5pqPitEwAKAhG+ABQEAQ+ABQEgQ8ABUHgA0BBEPgAUBAEPgAUBIGP3Nj+5+zPKbbn511PpbK6n88et9n+avZ4lu3LB213ve3PjGK/B/s6zPdetn3qaPWFxtKUdwEorojoD8UpkuZLas+vmmMTER2S+tdEn6W+pbb/Ofve1xP2BZSNET5yY7t/2eHbJF2ZXdG7KFud8g7bT9t+zvbCbPtZtlfZvtf2T23fZvt3shuUrLc9rURfp9t+MLuRybr+EbjtP7b9fPZ1Y/bclOzmL3+T3fTk4WxZBdm+NHv9jyV9ftD+Z9n+fra66PWSFmXv50rb/8P2n2TbXWT7J9n7etD2ydnzK23fnr2Xn9q+ssR7mdV/AxLbp2T1PWt7iSSP7G8DRUDgoxbcLGl1RFwUEXdJ+pykNyLiVyT9iqTftz012/ZCSTdImqG+5R9+OSIuU98a7V8o0cdXJa2KiAslXSJpg+1LJX1W0gclfSjr5+Js+/dJ+lpEnC9pl6T/mD3/t5L+KCJ+9XCdRMTLkr4u6a7s/awetsnfS1ocERdIWi/plkHfa8rey43Dni/lFklPRMTFkv5R0uQyX4cCIvBRiz4i6TPZGvRPSjpFfQEsSU9nN2jZK2mzpIez59erb2roSK6W9NfSweWO35B0haQHI+Lt7CYnD0jqH1lviYi12eM1kqbY/kVJJ0XEquz5/1PJmzrM678lafCSuv2rkq45ynsZ7CpJ387e1w8k7aykJhQLc/ioRZb0hYj40ZAn7VmS9g56qndQu1eV/zyXmv4Y3M8BSROy7VMuPtXf5wFV9l5YEAtlYYSPWrBb0gmD2j+S9AfZmvyy/cvZErTH4lFJf5Dtb6ztEyU9Luk3bR+f7f8TkoZPwRyUrYf/hu0rsqd+5wibDn8//a9/Q9LOQfPzn5a0avh2FXq8vw7bcySdfIz7QwMj8FELnpO0P/swdJH65uNfkPRMdtrjEh37b6M3SJpte736pkzOz+7b+3fqW972SUl3R8SzR9nPZyV9LfvQ9p0jbPM9SZ/o/9B22Pd+V9Id2XLLF0m6dUTvZsCXJV1l+xn1TYVtPcb9oYGxPDIAFAQjfAAoCD60RUOx/V8l/adhT98XEf8zj3qOhe1/L+n2YU9viYhP5FEP6h9TOgBQEEzpAEBBEPgAUBAEPgAUBIEPAAVB4ANAQfx/Oi/mPdQ1LakAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x23a17a4c9b0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x = 'item_condition_id', y = np.log(train['price']+1), data = train, palette = sns.color_palette('RdBu',5))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "There seems to be various on the average price between each item condition id."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Therefore, we are going to use all the features to build our model."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# LightGBM"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Settings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "NUM_BRANDS = 4000\n",
    "NUM_CATEGORIES = 1000\n",
    "NAME_MIN_DF = 10\n",
    "MAX_FEATURES_ITEM_DESCRIPTION = 50000"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "There are missing values in the columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are 5083 items that do not have a category name.\n"
     ]
    }
   ],
   "source": [
    "print('There are %d items that do not have a category name.' %train['category_name'].isnull().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are 506370 items that do not have a brand name.\n"
     ]
    }
   ],
   "source": [
    "print('There are %d items that do not have a brand name.' %train['brand_name'].isnull().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are 3 items that do not have a description.\n"
     ]
    }
   ],
   "source": [
    "print('There are %d items that do not have a description.' %train['item_description'].isnull().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "def handle_missing_inplace(dataset): \n",
    "    dataset['category_name'].fillna(value='missing', inplace=True) \n",
    "    dataset['brand_name'].fillna(value='missing', inplace=True) \n",
    "    dataset['item_description'].replace('No description yet,''missing', inplace=True) \n",
    "    dataset['item_description'].fillna(value='missing', inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cutting(dataset):\n",
    "    pop_brand = dataset['brand_name'].value_counts().loc[lambda x: x.index != 'missing'].index[:NUM_BRANDS]\n",
    "    dataset.loc[~dataset['brand_name'].isin(pop_brand), 'brand_name'] = 'missing'\n",
    "    pop_category = dataset['category_name'].value_counts().loc[lambda x: x.index != 'missing'].index[:NUM_CATEGORIES]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "def to_categorical(dataset):\n",
    "    dataset['category_name'] = dataset['category_name'].astype('category')\n",
    "    dataset['brand_name'] = dataset['brand_name'].astype('category')\n",
    "    dataset['item_condition_id'] = dataset['item_condition_id'].astype('category')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('train.tsv', sep = '\\t')\n",
    "msk = np.random.rand(len(df)) < 0.8\n",
    "train = df[msk]\n",
    "test = df[~msk]\n",
    "test_new = test.drop('price', axis=1)\n",
    "y_test = np.log1p(test[\"price\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Drop rows where price = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = train[train.price != 0].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "nrow_train = train.shape[0]\n",
    "y = np.log1p(train[\"price\"])\n",
    "merge: pd.DataFrame = pd.concat([train, test_new])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "handle_missing_inplace(merge)\n",
    "cutting(merge)\n",
    "to_categorical(merge)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand_name</th>\n",
       "      <th>category_name</th>\n",
       "      <th>item_condition_id</th>\n",
       "      <th>item_description</th>\n",
       "      <th>name</th>\n",
       "      <th>price</th>\n",
       "      <th>shipping</th>\n",
       "      <th>train_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Razer</td>\n",
       "      <td>Electronics/Computers &amp; Tablets/Components &amp; P...</td>\n",
       "      <td>3</td>\n",
       "      <td>This keyboard is in great condition and works ...</td>\n",
       "      <td>Razer BlackWidow Chroma Keyboard</td>\n",
       "      <td>52.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Target</td>\n",
       "      <td>Women/Tops &amp; Blouses/Blouse</td>\n",
       "      <td>1</td>\n",
       "      <td>Adorable top with a hint of lace and a key hol...</td>\n",
       "      <td>AVA-VIV Blouse</td>\n",
       "      <td>10.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>missing</td>\n",
       "      <td>Home/Home Décor/Home Décor Accents</td>\n",
       "      <td>1</td>\n",
       "      <td>New with tags. Leather horses. Retail for [rm]...</td>\n",
       "      <td>Leather Horse Statues</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>missing</td>\n",
       "      <td>Women/Other/Other</td>\n",
       "      <td>3</td>\n",
       "      <td>Banana republic bottoms, Candies skirt with ma...</td>\n",
       "      <td>Bundled items requested for Ruie</td>\n",
       "      <td>59.0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Acacia Swimwear</td>\n",
       "      <td>Women/Swimwear/Two-Piece</td>\n",
       "      <td>3</td>\n",
       "      <td>Size small but straps slightly shortened to fi...</td>\n",
       "      <td>Acacia pacific tides santorini top</td>\n",
       "      <td>64.0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        brand_name                                      category_name  \\\n",
       "1            Razer  Electronics/Computers & Tablets/Components & P...   \n",
       "2           Target                        Women/Tops & Blouses/Blouse   \n",
       "3          missing                 Home/Home Décor/Home Décor Accents   \n",
       "5          missing                                  Women/Other/Other   \n",
       "6  Acacia Swimwear                           Women/Swimwear/Two-Piece   \n",
       "\n",
       "  item_condition_id                                   item_description  \\\n",
       "1                 3  This keyboard is in great condition and works ...   \n",
       "2                 1  Adorable top with a hint of lace and a key hol...   \n",
       "3                 1  New with tags. Leather horses. Retail for [rm]...   \n",
       "5                 3  Banana republic bottoms, Candies skirt with ma...   \n",
       "6                 3  Size small but straps slightly shortened to fi...   \n",
       "\n",
       "                                 name  price  shipping  train_id  \n",
       "1    Razer BlackWidow Chroma Keyboard   52.0         0         1  \n",
       "2                      AVA-VIV Blouse   10.0         1         2  \n",
       "3               Leather Horse Statues   35.0         1         3  \n",
       "5    Bundled items requested for Ruie   59.0         0         5  \n",
       "6  Acacia pacific tides santorini top   64.0         0         6  "
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merge.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Count vectorize name and category name columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "cv = CountVectorizer(min_df=NAME_MIN_DF)\n",
    "X_name = cv.fit_transform(merge['name'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "cv = CountVectorizer()\n",
    "X_category = cv.fit_transform(merge['category_name'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "TFIDF Vectorize item_description column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "tv = TfidfVectorizer(max_features=MAX_FEATURES_ITEM_DESCRIPTION, ngram_range=(1, 3), stop_words='english')\n",
    "X_description = tv.fit_transform(merge['item_description'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Label binarize brand_name column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "lb = LabelBinarizer(sparse_output=True)\n",
    "X_brand = lb.fit_transform(merge['brand_name'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create dummy variables for item_condition_id and shipping columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_dummies = csr_matrix(pd.get_dummies(merge[['item_condition_id', 'shipping']], sparse=True).values)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create sparse merge."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "sparse_merge = hstack((X_dummies, X_description, X_brand, X_category, X_name)).tocsr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1482535, 72759)"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sparse_merge.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Remove features with document frequency <=1."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "mask = np.array(np.clip(sparse_merge.getnnz(axis=0) - 1, 0, 1), dtype=bool)\n",
    "sparse_merge = sparse_merge[:, mask]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1482535, 72293)"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sparse_merge.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Separate train and test data from sparse merge."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = sparse_merge[:nrow_train]\n",
    "X_test = sparse_merge[nrow_train:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_X = lgb.Dataset(X, label=y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "params = {\n",
    "        'learning_rate': 0.75,\n",
    "        'application': 'regression',\n",
    "        'max_depth': 3,\n",
    "        'num_leaves': 100,\n",
    "        'verbosity': -1,\n",
    "        'metric': 'RMSE',\n",
    "    }"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Training\n",
    "Training a model requires a parameter list and data set. And training will take a while."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "gbm = lgb.train(params, train_set=train_X, num_boost_round=3200, verbose_eval=100)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The rmse of prediction is: 0.46164222941613137\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import mean_squared_error\n",
    "print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
